Skip to content

Crypto

provide.foundation.crypto

TODO: Add module docstring.

Functions

calculate_checksums

calculate_checksums(
    path: Path | str, algorithms: list[str] | None = None
) -> dict[str, str]

Calculate multiple checksums for a file.

Parameters:

Name Type Description Default
path Path | str

File path

required
algorithms list[str] | None

List of algorithms (defaults to sha256 and md5)

None

Returns:

Type Description
dict[str, str]

Dictionary mapping algorithm name to hex digest

Raises:

Type Description
ResourceError

If file cannot be read

ValidationError

If any algorithm is not supported

Source code in provide/foundation/crypto/checksums.py
def calculate_checksums(
    path: Path | str,
    algorithms: list[str] | None = None,
) -> dict[str, str]:
    """Calculate multiple checksums for a file.

    Args:
        path: File path
        algorithms: List of algorithms (defaults to sha256 and md5)

    Returns:
        Dictionary mapping algorithm name to hex digest

    Raises:
        ResourceError: If file cannot be read
        ValidationError: If any algorithm is not supported

    """
    if algorithms is None:
        algorithms = ["sha256", "md5"]

    from provide.foundation.crypto.hashing import hash_file_multiple

    checksums = hash_file_multiple(path, algorithms)

    log.debug(
        "📝 Calculated checksums",
        path=str(path),
        algorithms=algorithms,
    )

    return checksums

compare_hash

compare_hash(hash1: str, hash2: str) -> bool

Compare two hash values in a case-insensitive manner.

Parameters:

Name Type Description Default
hash1 str

First hash value

required
hash2 str

Second hash value

required

Returns:

Type Description
bool

True if hashes match (case-insensitive)

Source code in provide/foundation/crypto/utils.py
def compare_hash(hash1: str, hash2: str) -> bool:
    """Compare two hash values in a case-insensitive manner.

    Args:
        hash1: First hash value
        hash2: Second hash value

    Returns:
        True if hashes match (case-insensitive)

    """
    return hash1.lower() == hash2.lower()

format_checksum

format_checksum(
    data: bytes, algorithm: str = DEFAULT_ALGORITHM
) -> str

Calculate checksum with algorithm prefix.

Returns checksums in the format "algorithm:hexdigest" (e.g., "sha256:abc123..."). This format enables self-describing checksums that include the algorithm used.

Parameters:

Name Type Description Default
data bytes

Data to checksum

required
algorithm str

Hash algorithm (sha256, sha512, blake2b, blake2s, md5, adler32)

DEFAULT_ALGORITHM

Returns:

Type Description
str

Prefixed checksum string (e.g., "sha256:abc123...")

Raises:

Type Description
ValueError

If algorithm is not supported

Example

data = b"Hello, World!" format_checksum(data, "sha256") 'sha256:dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f' format_checksum(data, "adler32") 'adler32:1c49043e'

Source code in provide/foundation/crypto/prefixed.py
def format_checksum(data: bytes, algorithm: str = DEFAULT_ALGORITHM) -> str:
    """Calculate checksum with algorithm prefix.

    Returns checksums in the format "algorithm:hexdigest" (e.g., "sha256:abc123...").
    This format enables self-describing checksums that include the algorithm used.

    Args:
        data: Data to checksum
        algorithm: Hash algorithm (sha256, sha512, blake2b, blake2s, md5, adler32)

    Returns:
        Prefixed checksum string (e.g., "sha256:abc123...")

    Raises:
        ValueError: If algorithm is not supported

    Example:
        >>> data = b"Hello, World!"
        >>> format_checksum(data, "sha256")
        'sha256:dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f'
        >>> format_checksum(data, "adler32")
        'adler32:1c49043e'

    """
    if algorithm == "adler32":
        # Special case for adler32 using zlib
        import zlib

        checksum = zlib.adler32(data) & 0xFFFFFFFF
        result = f"adler32:{checksum:08x}"
        log.debug(
            "🔐 Calculated adler32 checksum",
            size=len(data),
            checksum=result,
        )
        return result

    # Use standard hashing for other algorithms
    validate_algorithm(algorithm)
    digest = hash_data(data, algorithm)
    result = f"{algorithm}:{digest}"

    log.debug(
        "🔐 Calculated prefixed checksum",
        algorithm=algorithm,
        size=len(data),
        checksum=result[:40] + "...",
    )

    return result

format_hash

format_hash(
    hash_value: str,
    group_size: int = 8,
    groups: int = 0,
    separator: str = " ",
) -> str

Format a hash value for display.

Parameters:

Name Type Description Default
hash_value str

Hash value to format

required
group_size int

Number of characters per group

8
groups int

Number of groups to show (0 for all)

0
separator str

Separator between groups

' '

Returns:

Type Description
str

Formatted hash string

Examples:

>>> format_hash("abc123def456", group_size=4, separator="-")
"abc1-23de-f456"
>>> format_hash("abc123def456", group_size=4, groups=2)
"abc1 23de"
Source code in provide/foundation/crypto/utils.py
def format_hash(
    hash_value: str,
    group_size: int = 8,
    groups: int = 0,
    separator: str = " ",
) -> str:
    """Format a hash value for display.

    Args:
        hash_value: Hash value to format
        group_size: Number of characters per group
        groups: Number of groups to show (0 for all)
        separator: Separator between groups

    Returns:
        Formatted hash string

    Examples:
        >>> format_hash("abc123def456", group_size=4, separator="-")
        "abc1-23de-f456"
        >>> format_hash("abc123def456", group_size=4, groups=2)
        "abc1 23de"

    """
    if group_size <= 0:
        return hash_value

    formatted_parts = []
    for i in range(0, len(hash_value), group_size):
        formatted_parts.append(hash_value[i : i + group_size])
        if groups > 0 and len(formatted_parts) >= groups:
            break

    return separator.join(formatted_parts)

get_hasher

get_hasher(algorithm: str) -> Any

Get a hash object for the specified algorithm.

Parameters:

Name Type Description Default
algorithm str

Hash algorithm name

required

Returns:

Type Description
Any

Hash object from hashlib

Raises:

Type Description
ValidationError

If algorithm is not supported

Source code in provide/foundation/crypto/algorithms.py
def get_hasher(algorithm: str) -> Any:
    """Get a hash object for the specified algorithm.

    Args:
        algorithm: Hash algorithm name

    Returns:
        Hash object from hashlib

    Raises:
        ValidationError: If algorithm is not supported

    """
    validate_algorithm(algorithm)

    algorithm_lower = algorithm.lower()

    # Handle special cases
    if algorithm_lower.startswith("sha3_"):
        # sha3_256 -> sha3_256 (hashlib uses underscores)
        return hashlib.new(algorithm_lower)
    if algorithm_lower.startswith("blake2"):
        # blake2b, blake2s
        return hashlib.new(algorithm_lower)
    # Standard algorithms (md5, sha1, sha256, etc.)
    return hashlib.new(algorithm_lower)

hash_data

hash_data(
    data: bytes, algorithm: str = DEFAULT_ALGORITHM
) -> str

Hash binary data.

Parameters:

Name Type Description Default
data bytes

Data to hash

required
algorithm str

Hash algorithm

DEFAULT_ALGORITHM

Returns:

Type Description
str

Hex digest

Raises:

Type Description
ValidationError

If algorithm is not supported

Source code in provide/foundation/crypto/hashing.py
def hash_data(
    data: bytes,
    algorithm: str = DEFAULT_ALGORITHM,
) -> str:
    """Hash binary data.

    Args:
        data: Data to hash
        algorithm: Hash algorithm

    Returns:
        Hex digest

    Raises:
        ValidationError: If algorithm is not supported

    """
    validate_algorithm(algorithm)
    hasher = get_hasher(algorithm)
    hasher.update(data)

    hash_value = hasher.hexdigest()
    log.debug(
        "🔐 Hashed data",
        algorithm=algorithm,
        size=len(data),
        hash=hash_value[:16] + "...",
    )
    return hash_value

hash_file

hash_file(
    path: Path | str,
    algorithm: str = DEFAULT_ALGORITHM,
    chunk_size: int = DEFAULT_CHUNK_SIZE,
) -> str

Hash a file's contents.

Parameters:

Name Type Description Default
path Path | str

File path

required
algorithm str

Hash algorithm (sha256, sha512, md5, etc.)

DEFAULT_ALGORITHM
chunk_size int

Size of chunks to read at a time

DEFAULT_CHUNK_SIZE

Returns:

Type Description
str

Hex digest of file hash

Raises:

Type Description
ResourceError

If file cannot be read

ValidationError

If algorithm is not supported

Source code in provide/foundation/crypto/hashing.py
def hash_file(
    path: Path | str,
    algorithm: str = DEFAULT_ALGORITHM,
    chunk_size: int = DEFAULT_CHUNK_SIZE,
) -> str:
    """Hash a file's contents.

    Args:
        path: File path
        algorithm: Hash algorithm (sha256, sha512, md5, etc.)
        chunk_size: Size of chunks to read at a time

    Returns:
        Hex digest of file hash

    Raises:
        ResourceError: If file cannot be read
        ValidationError: If algorithm is not supported

    """
    if isinstance(path, str):
        path = Path(path)

    if not path.exists():
        raise ResourceError(
            f"File not found: {path}",
            resource_type="file",
            resource_path=str(path),
        )

    if not path.is_file():
        raise ResourceError(
            f"Path is not a file: {path}",
            resource_type="file",
            resource_path=str(path),
        )

    validate_algorithm(algorithm)
    hasher = get_hasher(algorithm)

    try:
        with path.open("rb") as f:
            while chunk := f.read(chunk_size):
                hasher.update(chunk)

        hash_value = hasher.hexdigest()
        log.debug(
            "🔐 Hashed file",
            path=str(path),
            algorithm=algorithm,
            hash=hash_value[:16] + "...",
        )
        return hash_value

    except OSError as e:
        raise ResourceError(
            f"Failed to read file: {path}",
            resource_type="file",
            resource_path=str(path),
        ) from e

hash_name

hash_name(name: str) -> int

Generate a 64-bit hash of a string for fast lookup.

This is useful for creating numeric identifiers from strings.

Parameters:

Name Type Description Default
name str

String to hash

required

Returns:

Type Description
int

64-bit integer hash

Source code in provide/foundation/crypto/utils.py
def hash_name(name: str) -> int:
    """Generate a 64-bit hash of a string for fast lookup.

    This is useful for creating numeric identifiers from strings.

    Args:
        name: String to hash

    Returns:
        64-bit integer hash

    """
    # Use first 8 bytes of SHA256 for good distribution
    hash_bytes = hashlib.sha256(name.encode("utf-8")).digest()[:8]
    return int.from_bytes(hash_bytes, byteorder="little")

hash_stream

hash_stream(
    stream: BinaryIO,
    algorithm: str = DEFAULT_ALGORITHM,
    chunk_size: int = DEFAULT_CHUNK_SIZE,
) -> str

Hash data from a stream.

Parameters:

Name Type Description Default
stream BinaryIO

Binary stream to read from

required
algorithm str

Hash algorithm

DEFAULT_ALGORITHM
chunk_size int

Size of chunks to read at a time

DEFAULT_CHUNK_SIZE

Returns:

Type Description
str

Hex digest

Raises:

Type Description
ValidationError

If algorithm is not supported

Source code in provide/foundation/crypto/hashing.py
def hash_stream(
    stream: BinaryIO,
    algorithm: str = DEFAULT_ALGORITHM,
    chunk_size: int = DEFAULT_CHUNK_SIZE,
) -> str:
    """Hash data from a stream.

    Args:
        stream: Binary stream to read from
        algorithm: Hash algorithm
        chunk_size: Size of chunks to read at a time

    Returns:
        Hex digest

    Raises:
        ValidationError: If algorithm is not supported

    """
    validate_algorithm(algorithm)
    hasher = get_hasher(algorithm)

    bytes_read = 0
    while chunk := stream.read(chunk_size):
        hasher.update(chunk)
        bytes_read += len(chunk)

    hash_value = hasher.hexdigest()
    log.debug(
        "🔐 Hashed stream",
        algorithm=algorithm,
        bytes_read=bytes_read,
        hash=hash_value[:16] + "...",
    )
    return hash_value

hash_string

hash_string(
    text: str,
    algorithm: str = DEFAULT_ALGORITHM,
    encoding: str = "utf-8",
) -> str

Hash a text string.

Parameters:

Name Type Description Default
text str

Text to hash

required
algorithm str

Hash algorithm

DEFAULT_ALGORITHM
encoding str

Text encoding

'utf-8'

Returns:

Type Description
str

Hex digest

Raises:

Type Description
ValidationError

If algorithm is not supported

Source code in provide/foundation/crypto/hashing.py
def hash_string(
    text: str,
    algorithm: str = DEFAULT_ALGORITHM,
    encoding: str = "utf-8",
) -> str:
    """Hash a text string.

    Args:
        text: Text to hash
        algorithm: Hash algorithm
        encoding: Text encoding

    Returns:
        Hex digest

    Raises:
        ValidationError: If algorithm is not supported

    """
    return hash_data(text.encode(encoding), algorithm)

is_secure_algorithm

is_secure_algorithm(algorithm: str) -> bool

Check if an algorithm is considered cryptographically secure.

Parameters:

Name Type Description Default
algorithm str

Hash algorithm name

required

Returns:

Type Description
bool

True if algorithm is secure, False otherwise

Source code in provide/foundation/crypto/algorithms.py
def is_secure_algorithm(algorithm: str) -> bool:
    """Check if an algorithm is considered cryptographically secure.

    Args:
        algorithm: Hash algorithm name

    Returns:
        True if algorithm is secure, False otherwise

    """
    return algorithm.lower() in SECURE_ALGORITHMS

is_strong_checksum

is_strong_checksum(checksum_str: str) -> bool

Check if a checksum uses a cryptographically strong algorithm.

Strong algorithms are suitable for security-critical applications. Weak algorithms like MD5 and Adler32 should only be used for non-security purposes like data integrity checks.

Parameters:

Name Type Description Default
checksum_str str

Prefixed checksum string

required

Returns:

Type Description
bool

True if using a strong algorithm (sha256, sha512, blake2b, blake2s)

Example

is_strong_checksum("sha256:abc123") True is_strong_checksum("md5:abc123") False is_strong_checksum("adler32:deadbeef") False

Source code in provide/foundation/crypto/prefixed.py
def is_strong_checksum(checksum_str: str) -> bool:
    """Check if a checksum uses a cryptographically strong algorithm.

    Strong algorithms are suitable for security-critical applications.
    Weak algorithms like MD5 and Adler32 should only be used for
    non-security purposes like data integrity checks.

    Args:
        checksum_str: Prefixed checksum string

    Returns:
        True if using a strong algorithm (sha256, sha512, blake2b, blake2s)

    Example:
        >>> is_strong_checksum("sha256:abc123")
        True
        >>> is_strong_checksum("md5:abc123")
        False
        >>> is_strong_checksum("adler32:deadbeef")
        False

    """
    try:
        algorithm, _ = parse_checksum(checksum_str)
        strong_algorithms = {"sha256", "sha512", "blake2b", "blake2s"}
        is_strong = algorithm in strong_algorithms

        log.debug(
            "🔒 Checked checksum strength",
            algorithm=algorithm,
            is_strong=is_strong,
        )

        return is_strong

    except ValueError:
        log.warning(
            "⚠️ Cannot determine checksum strength - invalid format",
            checksum=checksum_str[:40] + "...",
        )
        return False

normalize_checksum

normalize_checksum(checksum_str: str) -> str

Normalize a checksum string to prefixed format.

Ensures the checksum is in the standard "algorithm:value" format and validates both the algorithm and value.

Parameters:

Name Type Description Default
checksum_str str

Checksum string to normalize

required

Returns:

Type Description
str

Normalized checksum with prefix

Raises:

Type Description
ValueError

If checksum format is invalid

Example

normalize_checksum("sha256:ABC123") 'sha256:abc123'

Source code in provide/foundation/crypto/prefixed.py
def normalize_checksum(checksum_str: str) -> str:
    """Normalize a checksum string to prefixed format.

    Ensures the checksum is in the standard "algorithm:value" format
    and validates both the algorithm and value.

    Args:
        checksum_str: Checksum string to normalize

    Returns:
        Normalized checksum with prefix

    Raises:
        ValueError: If checksum format is invalid

    Example:
        >>> normalize_checksum("sha256:ABC123")
        'sha256:abc123'

    """
    algorithm, value = parse_checksum(checksum_str)
    normalized = f"{algorithm}:{value.lower()}"

    log.debug(
        "🔄 Normalized checksum",
        input=checksum_str[:40] + "...",
        output=normalized[:40] + "...",
    )

    return normalized

parse_checksum

parse_checksum(checksum_str: str) -> tuple[str, str]

Parse algorithm and value from a prefixed checksum string.

Requires prefixed format ("algorithm:hexvalue"). This enables validation of both the algorithm and the checksum value.

Parameters:

Name Type Description Default
checksum_str str

Prefixed checksum string

required

Returns:

Type Description
tuple[str, str]

Tuple of (algorithm, hex_value)

Raises:

Type Description
ValueError

If checksum format is invalid or algorithm is unsupported

Example

parse_checksum("sha256:abc123") ('sha256', 'abc123') parse_checksum("invalid") ValueError: Checksum must use prefixed format (algorithm:value)

Source code in provide/foundation/crypto/prefixed.py
def parse_checksum(checksum_str: str) -> tuple[str, str]:
    """Parse algorithm and value from a prefixed checksum string.

    Requires prefixed format ("algorithm:hexvalue"). This enables validation
    of both the algorithm and the checksum value.

    Args:
        checksum_str: Prefixed checksum string

    Returns:
        Tuple of (algorithm, hex_value)

    Raises:
        ValueError: If checksum format is invalid or algorithm is unsupported

    Example:
        >>> parse_checksum("sha256:abc123")
        ('sha256', 'abc123')
        >>> parse_checksum("invalid")
        ValueError: Checksum must use prefixed format (algorithm:value)

    """
    if not checksum_str:
        raise ValueError("Empty checksum string")

    if ":" not in checksum_str:
        raise ValueError(f"Checksum must use prefixed format (algorithm:value): {checksum_str}")

    parts = checksum_str.split(":", 1)
    if len(parts) != 2:
        raise ValueError(f"Invalid checksum format: {checksum_str}")

    algorithm, value = parts

    # Validate algorithm
    supported_algorithms = ["sha256", "sha512", "blake2b", "blake2s", "md5", "adler32"]
    if algorithm not in supported_algorithms:
        raise ValueError(
            f"Unknown checksum algorithm: {algorithm}. Supported: {', '.join(supported_algorithms)}"
        )

    log.debug(
        "📋 Parsed prefixed checksum",
        algorithm=algorithm,
        value=value[:16] + "...",
    )

    return algorithm, value

parse_checksum_file

parse_checksum_file(
    path: Path | str, algorithm: str | None = None
) -> dict[str, str]

Parse a checksum file and return filename to hash mapping.

Supports common checksum file formats: - SHA256: "hash filename" or "hash filename" - MD5: "hash filename" or "hash filename" - SHA256SUMS: "hash filename" - MD5SUMS: "hash filename"

Parameters:

Name Type Description Default
path Path | str

Path to checksum file

required
algorithm str | None

Expected algorithm (for validation)

None

Returns:

Type Description
dict[str, str]

Dictionary mapping filename to hash

Raises:

Type Description
ResourceError

If file cannot be read

Source code in provide/foundation/crypto/checksums.py
def parse_checksum_file(
    path: Path | str,
    algorithm: str | None = None,
) -> dict[str, str]:
    """Parse a checksum file and return filename to hash mapping.

    Supports common checksum file formats:
    - SHA256: "hash  filename" or "hash filename"
    - MD5: "hash  filename" or "hash filename"
    - SHA256SUMS: "hash  filename"
    - MD5SUMS: "hash  filename"

    Args:
        path: Path to checksum file
        algorithm: Expected algorithm (for validation)

    Returns:
        Dictionary mapping filename to hash

    Raises:
        ResourceError: If file cannot be read

    """
    if isinstance(path, str):
        path = Path(path)

    if not path.exists():
        raise ResourceError(
            f"Checksum file not found: {path}",
            resource_type="file",
            resource_path=str(path),
        )

    checksums = {}

    try:
        from provide.foundation.file.safe import safe_read_text

        content = safe_read_text(path, default="", encoding="utf-8")

        for line in content.splitlines():
            line = line.strip()
            if not line or line.startswith("#"):
                continue

            # Split on whitespace (handle both single and double space)
            parts = line.split(None, 1)
            if len(parts) == 2:
                hash_value, filename = parts
                # Remove any leading asterisk (binary mode indicator)
                filename = filename.removeprefix("*")
                checksums[filename] = hash_value.lower()

        log.debug(
            path=str(path),
            entries=len(checksums),
            algorithm=algorithm,
        )

        return checksums

    except OSError as e:
        raise ResourceError(
            f"Failed to read checksum file: {path}",
            resource_type="file",
            resource_path=str(path),
        ) from e

quick_hash

quick_hash(data: bytes) -> int

Generate a quick non-cryptographic hash for lookups.

This uses Python's built-in hash function which is fast but not cryptographically secure. Use only for hash tables and caching.

Parameters:

Name Type Description Default
data bytes

Data to hash

required

Returns:

Type Description
int

32-bit hash value

Source code in provide/foundation/crypto/utils.py
def quick_hash(data: bytes) -> int:
    """Generate a quick non-cryptographic hash for lookups.

    This uses Python's built-in hash function which is fast but not
    cryptographically secure. Use only for hash tables and caching.

    Args:
        data: Data to hash

    Returns:
        32-bit hash value

    """
    # Use Python's built-in hash for speed, mask to 32 bits
    return hash(data) & 0xFFFFFFFF

validate_algorithm

validate_algorithm(algorithm: str) -> None

Validate that a hash algorithm is supported.

Parameters:

Name Type Description Default
algorithm str

Hash algorithm name

required

Raises:

Type Description
ValidationError

If algorithm is not supported

Source code in provide/foundation/crypto/algorithms.py
def validate_algorithm(algorithm: str) -> None:
    """Validate that a hash algorithm is supported.

    Args:
        algorithm: Hash algorithm name

    Raises:
        ValidationError: If algorithm is not supported

    """
    if algorithm.lower() not in SUPPORTED_ALGORITHMS:
        raise ValidationError(
            f"Unsupported hash algorithm: {algorithm}",
            field="algorithm",
            value=algorithm,
            rule="must be one of: " + ", ".join(sorted(SUPPORTED_ALGORITHMS)),
        )

verify_checksum

verify_checksum(data: bytes, checksum_str: str) -> bool

Verify data against a prefixed checksum string.

Automatically extracts the algorithm from the checksum string and performs verification using the appropriate algorithm.

Parameters:

Name Type Description Default
data bytes

Data to verify

required
checksum_str str

Expected prefixed checksum (e.g., "sha256:abc123...")

required

Returns:

Type Description
bool

True if checksum matches, False otherwise

Example

data = b"test data" checksum = format_checksum(data, "sha256") verify_checksum(data, checksum) True verify_checksum(b"wrong data", checksum) False

Source code in provide/foundation/crypto/prefixed.py
def verify_checksum(data: bytes, checksum_str: str) -> bool:
    """Verify data against a prefixed checksum string.

    Automatically extracts the algorithm from the checksum string and
    performs verification using the appropriate algorithm.

    Args:
        data: Data to verify
        checksum_str: Expected prefixed checksum (e.g., "sha256:abc123...")

    Returns:
        True if checksum matches, False otherwise

    Example:
        >>> data = b"test data"
        >>> checksum = format_checksum(data, "sha256")
        >>> verify_checksum(data, checksum)
        True
        >>> verify_checksum(b"wrong data", checksum)
        False

    """
    try:
        algorithm, expected_value = parse_checksum(checksum_str)
        actual_checksum = format_checksum(data, algorithm)
        actual_value = actual_checksum.split(":", 1)[1]

        matches = actual_value.lower() == expected_value.lower()

        if matches:
            log.debug(
                algorithm=algorithm,
                size=len(data),
            )
        else:
            log.warning(
                "❌ Prefixed checksum mismatch",
                algorithm=algorithm,
                expected=expected_value[:16] + "...",
                actual=actual_value[:16] + "...",
            )

        return matches

    except (ValueError, Exception) as e:
        log.warning(
            "❌ Checksum verification failed",
            error=str(e),
            checksum=checksum_str[:40] + "...",
        )
        return False

verify_data

verify_data(
    data: bytes,
    expected_hash: str,
    algorithm: str = DEFAULT_ALGORITHM,
) -> bool

Verify data matches an expected hash.

Parameters:

Name Type Description Default
data bytes

Data to verify

required
expected_hash str

Expected hash value

required
algorithm str

Hash algorithm

DEFAULT_ALGORITHM

Returns:

Type Description
bool

True if hash matches, False otherwise

Raises:

Type Description
ValidationError

If algorithm is not supported

Source code in provide/foundation/crypto/checksums.py
def verify_data(
    data: bytes,
    expected_hash: str,
    algorithm: str = DEFAULT_ALGORITHM,
) -> bool:
    """Verify data matches an expected hash.

    Args:
        data: Data to verify
        expected_hash: Expected hash value
        algorithm: Hash algorithm

    Returns:
        True if hash matches, False otherwise

    Raises:
        ValidationError: If algorithm is not supported

    """
    actual_hash = hash_data(data, algorithm)
    matches = compare_hash(actual_hash, expected_hash)

    if matches:
        log.debug(
            algorithm=algorithm,
            size=len(data),
        )
    else:
        log.warning(
            "❌ Data checksum mismatch",
            algorithm=algorithm,
            expected=expected_hash[:16] + "...",
            actual=actual_hash[:16] + "...",
        )

    return matches

verify_file

verify_file(
    path: Path | str,
    expected_hash: str,
    algorithm: str = DEFAULT_ALGORITHM,
) -> bool

Verify a file matches an expected hash.

Parameters:

Name Type Description Default
path Path | str

File path

required
expected_hash str

Expected hash value

required
algorithm str

Hash algorithm

DEFAULT_ALGORITHM

Returns:

Type Description
bool

True if hash matches, False otherwise

Raises:

Type Description
ResourceError

If file cannot be read

ValidationError

If algorithm is not supported

Source code in provide/foundation/crypto/checksums.py
def verify_file(
    path: Path | str,
    expected_hash: str,
    algorithm: str = DEFAULT_ALGORITHM,
) -> bool:
    """Verify a file matches an expected hash.

    Args:
        path: File path
        expected_hash: Expected hash value
        algorithm: Hash algorithm

    Returns:
        True if hash matches, False otherwise

    Raises:
        ResourceError: If file cannot be read
        ValidationError: If algorithm is not supported

    """
    if isinstance(path, str):
        path = Path(path)

    try:
        actual_hash = hash_file(path, algorithm)
        matches = compare_hash(actual_hash, expected_hash)

        if matches:
            log.debug(
                path=str(path),
                algorithm=algorithm,
            )
        else:
            log.warning(
                "❌ Checksum mismatch",
                path=str(path),
                algorithm=algorithm,
                expected=expected_hash[:16] + "...",
                actual=actual_hash[:16] + "...",
            )

        return matches

    except ResourceError:
        log.error(
            "❌ Failed to verify checksum - file not found",
            path=str(path),
        )
        return False

write_checksum_file

write_checksum_file(
    checksums: dict[str, str],
    path: Path | str,
    algorithm: str = DEFAULT_ALGORITHM,
    binary_mode: bool = True,
) -> None

Write checksums to a file in standard format.

Parameters:

Name Type Description Default
checksums dict[str, str]

Dictionary mapping filename to hash

required
path Path | str

Path to write checksum file

required
algorithm str

Algorithm name (for comments)

DEFAULT_ALGORITHM
binary_mode bool

Whether to use binary mode indicator (*)

True

Raises:

Type Description
ResourceError

If file cannot be written

Source code in provide/foundation/crypto/checksums.py
def write_checksum_file(
    checksums: dict[str, str],
    path: Path | str,
    algorithm: str = DEFAULT_ALGORITHM,
    binary_mode: bool = True,
) -> None:
    """Write checksums to a file in standard format.

    Args:
        checksums: Dictionary mapping filename to hash
        path: Path to write checksum file
        algorithm: Algorithm name (for comments)
        binary_mode: Whether to use binary mode indicator (*)

    Raises:
        ResourceError: If file cannot be written

    """
    if isinstance(path, str):
        path = Path(path)

    try:
        from provide.foundation.file.atomic import atomic_write_text

        # Build content
        lines = [
            f"# {algorithm.upper()} checksums",
            "# Generated by provide.foundation",
            "",
        ]

        # Add checksums
        for filename, hash_value in sorted(checksums.items()):
            if binary_mode:
                lines.append(f"{hash_value}  *{filename}")
            else:
                lines.append(f"{hash_value}  {filename}")

        content = "\n".join(lines) + "\n"
        atomic_write_text(path, content, encoding="utf-8")

        log.debug(
            "📝 Wrote checksum file",
            path=str(path),
            entries=len(checksums),
            algorithm=algorithm,
        )

    except OSError as e:
        raise ResourceError(
            f"Failed to write checksum file: {path}",
            resource_type="file",
            resource_path=str(path),
        ) from e