From 5ec7f5883157645b14269f124b727b64a2cc6aa2 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sun, 11 Jul 2021 23:33:19 +0200 Subject: [PATCH] tests/scripts/download-test-fonts.sh rewrite in Python3 Replaces download-test-fonts.sh with download-test-fonts.py which does the same work, and also avoids downloading anything if the files are already installed with the right content. Now uses the first 8 byte of each file's sha256 hash for the digest. --- ChangeLog | 12 ++ tests/README.md | 2 +- tests/scripts/download-test-fonts.py | 293 +++++++++++++++++++++++++++ tests/scripts/download-test-fonts.sh | 66 ------ 4 files changed, 306 insertions(+), 67 deletions(-) create mode 100755 tests/scripts/download-test-fonts.py delete mode 100755 tests/scripts/download-test-fonts.sh diff --git a/ChangeLog b/ChangeLog index 751fdd41f..790ef56f0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2021-07-15 David Turner + + Replaces download-test-fonts.sh with download-test-fonts.py which + does the same work, and also avoids downloading anything if the + files are already installed with the right content. + + Now uses the first 8 byte of each file's sha256 hash for the digest. + + * tests/scripts/download-test-fonts.sh: Removed + * tests/scripts/download-test-fonts.py: New script + * tests/README.md: Updated + 2021-07-15 Alex Richardson Support architectures where `long` is smaller than pointers. diff --git a/tests/README.md b/tests/README.md index f6f861182..0d0b99aa8 100644 --- a/tests/README.md +++ b/tests/README.md @@ -4,7 +4,7 @@ ### Download test fonts -Run the `tests/scripts/download-fonts.sh` script, which will +Run the `tests/scripts/download-fonts.py` script, which will download test fonts to the `tests/data/` directory first. ### Build the test programs diff --git a/tests/scripts/download-test-fonts.py b/tests/scripts/download-test-fonts.py new file mode 100755 index 000000000..cab133daf --- /dev/null +++ b/tests/scripts/download-test-fonts.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 + +"""Download test fonts used by the FreeType regression test programs. +These will be copied to $FREETYPE/tests/data/ by default. +""" + +import argparse +import collections +import hashlib +import io +import os +import requests +import sys +import zipfile + +from typing import Callable, List, Optional, Tuple + +# The list of download items describing the font files to install. +# Each download item is a dictionary with one of the following schemas: +# +# - File item: +# +# file_url +# Type: URL string. +# Required: Yes. +# Description: URL to download the file from. +# +# install_name +# Type: file name string +# Required: No +# Description: Installation name for the font file, only provided if it +# must be different from the original URL's basename. +# +# hex_digest +# Type: hexadecimal string +# Required: No +# Description: Digest of the input font file. +# +# - Zip items: +# +# These items correspond to one or more font files that are embedded in a +# remote zip archive. Each entry has the following fields: +# +# zip_url +# Type: URL string. +# Required: Yes. +# Description: URL to download the zip archive from. +# +# zip_files +# Type: List of file entries (see below) +# Required: Yes +# Description: A list of entries describing a single font file to be +# extracted from the archive +# +# Apart from that, some schemas are used for dictionaries used inside download +# items: +# +# - File entries: +# +# These are dictionaries describing a single font file to extract from an archive. +# +# filename +# Type: file path string +# Required: Yes +# Description: Path of source file, relative to the archive's top-level directory. +# +# install_name +# Type: file name string +# Required: No +# Description: Installation name for the font file, only provided if it must be +# different from the original filename value. +# +# hex_digest +# Type: hexadecimal string +# Required: No +# Description: Digest of the input source file +# +_DOWNLOAD_ITEMS = [ + { + "zip_url": "https://github.com/python-pillow/Pillow/files/6622147/As.I.Lay.Dying.zip", + "zip_files": [ + { + "filename": "As I Lay Dying.ttf", + "install_name": "As.I.Lay.Dying.ttf", + "hex_digest": "ef146bbc2673b387", + }, + ], + }, +] + + +def digest_data(data: bytes): + """Compute the digest of a given input byte string, which are the first 8 bytes of its sha256 hash.""" + m = hashlib.sha256() + m.update(data) + return m.digest()[:8] + + +def check_existing(path: str, hex_digest: str): + """Return True if |path| exists and matches |hex_digest|.""" + if not os.path.exists(path) or hex_digest is None: + return False + + with open(path, "rb") as f: + existing_content = f.read() + + return bytes.fromhex(hex_digest) == digest_data(existing_content) + + +def install_file(content: bytes, dest_path: str): + """Write a byte string to a given destination file. + + Args: + content: Input data, as a byte string + dest_path: Installation path + """ + parent_path = os.path.dirname(dest_path) + if not os.path.exists(parent_path): + os.makedirs(parent_path) + + with open(dest_path, "wb") as f: + f.write(content) + + +def download_file(url: str, expected_digest: Optional[bytes] = None): + """Download a file from a given URL. + + Args: + url: Input URL + expected_digest: Optional digest of the file + as a byte string + Returns: + URL content as binary string. + """ + r = requests.get(url, allow_redirects=True) + content = r.content + if expected_digest is not None: + digest = digest_data(r.content) + if digest != expected_digest: + raise ValueError( + "%s has invalid digest %s (expected %s)" + % (url, digest.hex(), expected_digest.hex()) + ) + + return content + + +def extract_file_from_zip_archive( + archive: zipfile.ZipFile, + archive_name: str, + filepath: str, + expected_digest: Optional[bytes] = None, +): + """Extract a file from a given zipfile.ZipFile archive. + + Args: + archive: Input ZipFile objec. + archive_name: Archive name or URL, only used to generate a human-readable error + message. + filepath: Input filepath in archive. + expected_digest: Optional digest for the file. + Returns: + A new File instance corresponding to the extract file. + Raises: + ValueError if expected_digest is not None and does not match the extracted file. + """ + file = archive.open(filepath) + if expected_digest is not None: + digest = digest_data(archive.open(filepath).read()) + if digest != expected_digest: + raise ValueError( + "%s in zip archive at %s has invalid digest %s (expected %s)" + % (filepath, archive_name, digest.hex(), expected_digest.hex()) + ) + return file.read() + + +def _get_and_install_file( + install_path: str, + hex_digest: Optional[str], + force_download: bool, + get_content: Callable[[], bytes], +) -> bool: + if not force_download and hex_digest is not None and os.path.exists(install_path): + with open(install_path, "rb") as f: + content: bytes = f.read() + if bytes.fromhex(hex_digest) == digest_data(content): + return False + + content = get_content() + install_file(content, install_path) + return True + + +def download_and_install_item( + item: dict, install_dir: str, force_download: bool +) -> List[Tuple[str, bool]]: + """Download and install one item. + + Args: + item: Download item as a dictionary, see above for schema. + install_dir: Installation directory. + force_download: Set to True to force download and installation, even if + the font file is already installed with the right content. + + Returns: + A list of (install_name, status) tuples, where 'install_name' is the file's + installation name under 'install_dir', and 'status' is a boolean that is True + to indicate that the file was downloaded and installed, or False to indicate that + the file is already installed with the right content. + """ + if "file_url" in item: + file_url = item["file_url"] + install_name = item.get("install_name", os.path.basename(file_url)) + install_path = os.path.join(install_dir, install_name) + hex_digest = item.get("hex_digest") + + def get_content(): + return download_file(file_url, hex_digest) + + status = _get_and_install_file( + install_path, hex_digest, force_download, get_content + ) + return [(install_name, status)] + + if "zip_url" in item: + # One or more files from a zip archive. + archive_url = item["zip_url"] + archive = zipfile.ZipFile(io.BytesIO(download_file(archive_url))) + + result = [] + for f in item["zip_files"]: + filename = f["filename"] + install_name = f.get("install_name", filename) + hex_digest = f.get("hex_digest") + + def get_content(): + return extract_file_from_zip_archive( + archive, + archive_url, + filename, + bytes.fromhex(hex_digest) if hex_digest else None, + ) + + status = _get_and_install_file( + os.path.join(install_dir, install_name), + hex_digest, + force_download, + get_content, + ) + result.append((install_name, status)) + + return result + + else: + raise ValueError("Unknown download item schema: %s" % item) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + + # Assume this script is under tests/scripts/ and tests/data/ + # is the default installation directory. + install_dir = os.path.normpath( + os.path.join(os.path.dirname(__file__), "..", "data") + ) + + parser.add_argument( + "--force", + action="store_true", + default=False, + help="Force download and installation of font files", + ) + + parser.add_argument( + "--install-dir", + default=install_dir, + help="Specify installation directory [%s]" % install_dir, + ) + + args = parser.parse_args() + + for item in _DOWNLOAD_ITEMS: + for install_name, status in download_and_install_item( + item, args.install_dir, args.force + ): + print("%s %s" % (install_name, "INSTALLED" if status else "UP-TO-DATE")) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/scripts/download-test-fonts.sh b/tests/scripts/download-test-fonts.sh deleted file mode 100755 index 1158f10c5..000000000 --- a/tests/scripts/download-test-fonts.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/bash -# Download test fonts used by the FreeType regression test programs. -# These will be copied to $FREETYPE/tests/data/ -# Each font file contains an 8-hexchar prefix corresponding to its md5sum - -set -e - -export LANG=C -export LC_ALL=C - -PROGDIR=$(dirname "$0") -PROGNAME=$(basename "$0") - -# Download a file from a given URL -# -# $1: URL -# $2: Destination directory -# $3: If not empty, destination file name. Default is to take -# the URL's basename. -# -download_file () { - local URL=$1 - local DST_DIR=$2 - local DST_FILE=$3 - if [[ -z "$DST_FILE" ]]; then - DST_FILE=$(basename "$URL") - fi - echo "URL: $URL" - wget -q -O "$DST_DIR/$DST_FILE" "$URL" -} - -# $1: URL -# $2: Destination directory -# $3+: Optional file list, otherwise the full archive is extracted to $2 -download_and_extract_zip () { - local URL=$1 - local DST_DIR=$2 - shift - shift - TEMP_DST_DIR=$(mktemp -d) - TEMP_DST_NAME="a.zip" - download_file "$URL" "$TEMP_DST_DIR" "$TEMP_DST_NAME" - unzip -qo "$TEMP_DST_DIR/$TEMP_DST_NAME" -d "$DST_DIR" "$@" - rm -rf "$TEMP_DST_DIR" -} - -# $1: File path -# $2: Expected md5sum -md5sum_check () { - local FILE=$1 - local EXPECTED=$2 - local HASH=$(md5sum "$FILE" | cut -d" " -f1) - if [[ "$EXPECTED" != "$HASH" ]]; then - echo "$FILE: Invalid md5sum $HASH expected $EXPECTED" - return 1 - fi -} - -INSTALL_DIR=$(cd $PROGDIR/.. && pwd)/data - -mkdir -p "$INSTALL_DIR" - -# See https://gitlab.freedesktop.org/freetype/freetype/-/issues/1063 -download_and_extract_zip "https://github.com/python-pillow/Pillow/files/6622147/As.I.Lay.Dying.zip" "$INSTALL_DIR" -mv "$INSTALL_DIR/As I Lay Dying.ttf" "$INSTALL_DIR/As.I.Lay.Dying.ttf" -md5sum_check "$INSTALL_DIR/As.I.Lay.Dying.ttf" e153d60e66199660f7cfe99ef4705ad7