Skip to content

Hashing

Functions for generating hashes.


b64_decoded_str

b64_decoded_str(encoded_str)

Decodes an encoded base64 string.

Parameters:

Name Type Description Default
encoded_str str

A string that has been previously encoded with base64

required

Returns:

Type Description
str

a decoded base 64 string

Source code in src/aibs_informatics_core/utils/hashing.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def b64_decoded_str(encoded_str: str) -> str:
    """Decodes an encoded base64 string.

    Args:
        encoded_str (str): A string that has been previously encoded with base64

    Returns:
        a decoded base 64 string
    """
    try:
        return standard_b64decode(encoded_str.encode()).decode()
    except Exception as e:
        logger.error(e)
        logger.exception(e)
        raise e

b64_encoded_str

b64_encoded_str(decoded_str)

Encodes a string with base 64.

Parameters:

Name Type Description Default
decoded_str str

Any string

required

Returns:

Type Description
str

an encoded base 64 string

Source code in src/aibs_informatics_core/utils/hashing.py
80
81
82
83
84
85
86
87
88
89
def b64_encoded_str(decoded_str: str) -> str:
    """Encodes a string with base 64.

    Args:
        decoded_str (str): Any string

    Returns:
        an encoded base 64 string
    """
    return standard_b64encode(decoded_str.encode()).decode()

generate_file_hash

generate_file_hash(
    filename, bufsize=128 * 1024, hash_type="sha256"
)

Generate a hash for a file

https://stackoverflow.com/a/70215084/4544508

Parameters:

Name Type Description Default
filename str | Path

filepath to hash

required
bufsize int

buffer size. Defaults to 128*1024.

128 * 1024
hash_type Literal['md5', 'sha256']

type of hash to generate. Defaults to "sha256".

'sha256'

Returns:

Type Description
str

hash value of file

Source code in src/aibs_informatics_core/utils/hashing.py
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
def generate_file_hash(
    filename: str | Path, bufsize: int = 128 * 1024, hash_type: HashTypeStr = "sha256"
) -> str:
    """Generate a hash for a file

    https://stackoverflow.com/a/70215084/4544508

    Args:
        filename (str|Path): filepath to hash
        bufsize (int, optional): buffer size. Defaults to 128*1024.
        hash_type (Literal["md5", "sha256"], optional): type of hash to generate.
            Defaults to "sha256".

    Returns:
        hash value of file
    """
    filename = str(filename)
    h = hashlib.new(hash_type)

    buffer = bytearray(bufsize)
    buffer_view = memoryview(buffer)
    with open(filename, "rb", buffering=0) as f:
        while True:
            n = f.readinto(buffer_view)  # type: ignore
            if not n:
                break
            h.update(buffer_view[:n])
    return h.hexdigest()

generate_path_hash

generate_path_hash(
    path, includes=None, excludes=None, hash_type="sha256"
)

Generate a hash based on files found under a given path.

Parameters:

Name Type Description Default
path str

path to compute a hash

required
includes List[str]

list of regex patterns to include. Defaults to None.

None
excludes List[str]

list of regex patterns to exclude. Defaults to None.

None
hash_type Literal['md5', 'sha256']

type of hash to generate. Defaults to "sha256".

'sha256'

Returns:

Type Description
str

hash value

Source code in src/aibs_informatics_core/utils/hashing.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def generate_path_hash(
    path: str | Path,
    includes: list[str] | None = None,
    excludes: list[str] | None = None,
    hash_type: HashTypeStr = "sha256",
) -> str:
    """Generate a hash based on files found under a given path.

    Args:
        path (str): path to compute a hash
        includes (List[str], optional): list of regex patterns to include. Defaults to None.
        excludes (List[str], optional): list of regex patterns to exclude. Defaults to None.
        hash_type (Literal["md5", "sha256"], optional): type of hash to generate.
            Defaults to "sha256".

    Returns:
        hash value
    """
    paths = find_all_paths(path, include_dirs=False)
    include_patterns = [re.compile(include) for include in includes or [r".*"]]
    exclude_patterns = [re.compile(exclude) for exclude in excludes or []]

    paths_to_hash = []
    for path in paths:
        # First check exclude patterns
        for exclude_pattern in exclude_patterns:
            if exclude_pattern.fullmatch(path):
                break
        else:
            # Now check include patterns
            for include_pattern in include_patterns:
                if include_pattern.fullmatch(path):
                    paths_to_hash.append(path)
                    break
    path_hash = hashlib.new(hash_type)
    for path in paths_to_hash:
        path_hash.update(generate_file_hash(path, hash_type=hash_type).encode("utf-8"))

    return path_hash.hexdigest()

sha256_hexdigest

sha256_hexdigest(content=None)

Create a SHA 256 Hex Digest string from optional content.

If content is not provided, a unique Hex Digest is generated from UUID

Parameters:

Name Type Description Default
content JSON

Input to base hexdigest off of. Defaults to None.

None

Returns:

Type Description
str

a SHA 256 hex digest string.

Source code in src/aibs_informatics_core/utils/hashing.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def sha256_hexdigest(content: JSON | None = None) -> str:
    """Create a SHA 256 Hex Digest string from optional content.

    If content is not provided, a unique Hex Digest is generated from UUID


    Args:
        content (JSON, optional): Input to base hexdigest off of. Defaults to None.

    Returns:
        a SHA 256 hex digest string.
    """
    if content is None:
        content = uuid_str()
    elif not isinstance(content, str):
        content = json.dumps(content, sort_keys=True)
    return hashlib.sha256(content.encode()).hexdigest()

urlsafe_b64_decoded_str

urlsafe_b64_decoded_str(encoded_str)

Decodes an encoded base64 string.

Parameters:

Name Type Description Default
encoded_str str

A string that has been previously encoded with base64

required

Returns:

Type Description
str

a decoded base 64 string

Source code in src/aibs_informatics_core/utils/hashing.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def urlsafe_b64_decoded_str(encoded_str: str) -> str:
    """Decodes an encoded base64 string.

    Args:
        encoded_str (str): A string that has been previously encoded with base64

    Returns:
        a decoded base 64 string
    """
    return urlsafe_b64decode(encoded_str.encode()).decode()

urlsafe_b64_encoded_str

urlsafe_b64_encoded_str(decoded_str)

Encodes a string with a URL SAFE version of base 64.

Parameters:

Name Type Description Default
decoded_str str

Any string

required

Returns:

Type Description
str

an encoded base 64 string

Source code in src/aibs_informatics_core/utils/hashing.py
104
105
106
107
108
109
110
111
112
113
def urlsafe_b64_encoded_str(decoded_str: str) -> str:
    """Encodes a string with a URL SAFE version of base 64.

    Args:
        decoded_str (str): Any string

    Returns:
        an encoded base 64 string
    """
    return urlsafe_b64encode(decoded_str.encode()).decode()

uuid_str

uuid_str(content=None)

Get a UUID String, with option for using a seed to ensure determinism.

Parameters:

Name Type Description Default
content str

A seed to use for determining UUID. Defaults to None.

None

Returns:

Type Description
str

UUID appropriate string

Source code in src/aibs_informatics_core/utils/hashing.py
30
31
32
33
34
35
36
37
38
39
40
41
def uuid_str(content: str | None = None) -> str:
    """Get a UUID String, with option for using a seed to ensure determinism.

    Args:
        content (str, optional): A seed to use for determining UUID. Defaults to None.

    Returns:
        UUID appropriate string
    """
    if content is None:
        return str(uuid.uuid4())
    return str(uuid.uuid3(namespace=uuid.NAMESPACE_DNS, name=content))