Skip to content
41 changes: 41 additions & 0 deletions .github/workflows/tag_archived.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Tag Archived Repos

on:
workflow_dispatch:
schedule:
- cron: '0 0 * * 0' # every sunday at midnight

jobs:
tag_archived:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Run script and capture output
id: run_script
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BATCH_SIZE: 50
run: |
BODY=$(python tag_archived.py)
echo "commit_body<<EOF" >> $GITHUB_OUTPUT
echo "$BODY" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT

- name: Commit changes
uses: stefanzweifel/git-auto-commit-action@v6
with:
commit_message: |
chore: Update package lists

${{ steps.run_script.outputs.commit_body }}
branch: master
file_pattern: packages.json deleted_packages.json
commit_user_name: "github-actions[bot]"
commit_user_email: "github-actions[bot]@users.noreply.github.com"
commit_author: "github-actions[bot] <github-actions[bot]@users.noreply.github.com>"
47 changes: 30 additions & 17 deletions packages.json
Original file line number Diff line number Diff line change
Expand Up @@ -25219,23 +25219,6 @@
"license": "MIT",
"web": "https://github.com/schneiderfelipe/hyperscript"
},
{
"name": "pl0t",
"url": "https://github.com/al6x/pl0t?subdir=api/nim",
"method": "git",
"tags": [
"plot",
"chart",
"table",
"excel",
"spreadsheet",
"visualization",
"data"
],
"description": "Plot and visualize data",
"license": "Proprietary",
"web": "https://pl0t.com"
},
{
"name": "gm_api",
"url": "https://github.com/thisago/gm_api",
Expand Down Expand Up @@ -36036,5 +36019,35 @@
"description": "A CLI library inspired by Ratatui",
"license": "MIT",
"web": "https://github.com/fox0430/celina"
},
{
"name": "cglm",
"url": "https://github.com/Niminem/cglm",
"method": "git",
"tags": [
"cglm",
"glm",
"math",
"3d",
"game",
"wrapper"
],
"description": "Nim wrapper for cglm, an optimized 3D math library written in C99",
"license": "MIT",
"web": "https://github.com/Niminem/cglm"
},
{
"name": "prettyterm",
"url": "https://github.com/CodeLibraty/prettyterm",
"method": "git",
"tags": [
"terminal",
"tui",
"utils",
"rytonlang"
],
"description": "Make your terminal interfaces prettier!",
"license": "MIT",
"web": "https://github.com/CodeLibraty/prettyterm"
}
]
153 changes: 153 additions & 0 deletions tag_archived.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import json
import os
import re
import subprocess
import sys
from urllib.parse import urlparse

def get_repo_from_url(url):
"""Extracts owner/repo from a GitHub URL, stripping query parameters."""
# Use urlparse to handle URL components correctly
parsed_url = urlparse(url)
if parsed_url.netloc == "github.com":
path_parts = parsed_url.path.strip('/').split('/')
if len(path_parts) >= 2:
owner = path_parts[0]
repo = path_parts[1]
if repo.endswith('.git'):
repo = repo[:-4]
return owner, repo
return None, None

def build_graphql_query(repos):
"""Builds a GraphQL query for a batch of repositories."""
query_parts = []
for i, (owner, repo) in enumerate(repos):
query_parts.append(f"""
repo{i}: repository(owner: "{owner}", name: "{repo}") {{
isArchived
nameWithOwner
}}
""")
return "query {" + "".join(query_parts) + "}"

def run_gh_query(query):
"""Runs a GraphQL query using the gh CLI."""
cmd = ["gh", "api", "graphql", "-f", f"query={query}"]
result = subprocess.run(cmd, capture_output=True, text=True)

# Even with a non-zero exit code, gh might return a JSON with partial data
# and an 'errors' field. We should try to parse it.
if not result.stdout:
print(f"Error running gh command: empty stdout. stderr: {result.stderr}", file=sys.stderr)
# Return something that won't crash the main loop
return {"data": {}}

try:
response_json = json.loads(result.stdout)
except json.JSONDecodeError:
print(f"Error running gh command: failed to parse JSON. stderr: {result.stderr}", file=sys.stderr)
return {"data": {}}

if "errors" in response_json:
# Filter out expected "NOT_FOUND" errors to avoid log spam.
critical_errors = [e for e in response_json.get("errors", []) if e.get("type") != "NOT_FOUND"]
if critical_errors:
print(f"GraphQL query returned critical errors: {critical_errors}", file=sys.stderr)

return response_json

def main():
"""Main function."""
batch_size = int(os.environ.get("BATCH_SIZE", 50))

with open("packages.json", "r") as f:
all_packages = json.load(f)

# Partition packages: those already marked 'deleted' vs. those to be checked.
packages_to_check = []
deleted_packages = []
for pkg in all_packages:
if "deleted" in pkg.get("tags", []):
deleted_packages.append(pkg)
else:
packages_to_check.append(pkg)

# Identify GitHub repos to query from the packages to be checked.
github_repos_map = {}
for pkg in packages_to_check:
owner, repo = get_repo_from_url(pkg.get("url", ""))
if owner and repo:
name_with_owner = f"{owner}/{repo}"
# Handle cases where multiple packages point to the same repo.
if name_with_owner not in github_repos_map:
github_repos_map[name_with_owner] = []
github_repos_map[name_with_owner].append(pkg)

# Batch query the GitHub API.
repos_to_query = list(github_repos_map.keys())
api_results = {}
for i in range(0, len(repos_to_query), batch_size):
batch_repos_str = repos_to_query[i:i+batch_size]
batch_repos_tuple = [tuple(r.split('/')) for r in batch_repos_str]
query = build_graphql_query(batch_repos_tuple)
result = run_gh_query(query)
if "data" in result and result.get("data") is not None:
for j, repo_str in enumerate(batch_repos_str):
key = f"repo{j}"
api_results[repo_str] = result["data"].get(key)

# Process API results.
newly_deleted_names = []
newly_archived_names = []
active_packages = []

# Start with a clean list of packages to check
remaining_packages = list(packages_to_check)

for repo_str, repo_data in api_results.items():
packages_to_update = github_repos_map[repo_str]
for pkg in packages_to_update:
if repo_data is None: # Repo not found, move to deleted.
if pkg in remaining_packages:
deleted_packages.append(pkg)
remaining_packages.remove(pkg)
newly_deleted_names.append(pkg['name'])
elif repo_data.get("isArchived"): # Repo is archived, tag it.
pkg_tags = set(pkg.get("tags", []))
if "archived" not in pkg_tags:
pkg_tags.add("archived")
pkg["tags"] = sorted(list(pkg_tags))
newly_archived_names.append(pkg['name'])

active_packages = remaining_packages

# Write output files if changes were made.
if newly_deleted_names or newly_archived_names:
# Write active packages to packages.json
with open("packages.json", "w") as f:
json.dump(active_packages, f, indent=2, ensure_ascii=False)
f.write('\n')

# Write deleted packages to deleted_packages.json
if deleted_packages:
with open("deleted_packages.json", "w") as f:
json.dump(deleted_packages, f, indent=2, ensure_ascii=False)
f.write('\n')

# Print summary for commit message.
if newly_deleted_names:
print("Moved to deleted_packages.json:")
for name in sorted(list(set(newly_deleted_names))):
print(f"- {name}")
if newly_archived_names:
if newly_deleted_names:
print() # Add a newline for separation.
print("Tagged as archived:")
for name in sorted(list(set(newly_archived_names))):
print(f"- {name}")
else:
print("No new archived or deleted repositories found.")

if __name__ == "__main__":
main()