From 4e61d0a260fa67b3913191c51583f74da2e47833 Mon Sep 17 00:00:00 2001 From: LiLittleCat Date: Mon, 20 Mar 2023 22:10:46 +0800 Subject: [PATCH] feat: add script to convert urls in json file --- .github/workflows/convert_to_json.yml | 27 +++++++++++++++++++++++++++ convert.py | 25 +++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 .github/workflows/convert_to_json.yml create mode 100644 convert.py diff --git a/.github/workflows/convert_to_json.yml b/.github/workflows/convert_to_json.yml new file mode 100644 index 0000000..572cf85 --- /dev/null +++ b/.github/workflows/convert_to_json.yml @@ -0,0 +1,27 @@ +name: Run Python Script on Main Branch + +on: + push: + branches: + - main + +jobs: + run_script: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + # Add any other dependencies your script requires here + + - name: Run Python script + run: python main.py diff --git a/convert.py b/convert.py new file mode 100644 index 0000000..3e1b8a2 --- /dev/null +++ b/convert.py @@ -0,0 +1,25 @@ +# convert the urls in README.md to json format, and save it to urls.json, colapse the same urls, remove the last `/` in the url + +import json +import re + + +# Read the content of README.md +with open("README.md", "r", encoding="utf-8") as file: + content = file.read() + +# Find all URLs in the content [] +urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+(?=\])', content) + + +# Remove the last '/' in the URL and collapse the same URLs +unique_urls = [] +for url in urls: + url = url[:-1] if url.endswith('/') else url + if url not in unique_urls: + unique_urls.append(url) + + +# Save the URLs to urls.json +with open("urls.json", "w") as file: + json.dump(unique_urls, file)