1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
import os
import sys
import shutil
from hashlib import md5
from logging import Logger, getLogger
log: Logger = getLogger(__name__)
# TODO: add file exclusion option
def get_file_list(path: str,
exts: tuple[str],
exclude_dirs: list[str] = []) -> list[str]:
log.debug('retrieving file list in path "%s" that contain file'
' extensions %s except directories %s', path, exts, exclude_dirs)
file_list: list[str] = []
for root, dirs, files in os.walk(path):
if exclude_dirs != []:
log.debug('removing excludes from list')
dirs[:] = [d for d in dirs if d not in exclude_dirs]
for file in files:
if file.endswith(exts):
# [1:] is required to remove the '/' at the beginning after replacing
file_name: str = os.path.join(root, file).replace(path, '')[1:]
file_list.append(file_name)
log.debug('added file "%s" without "%s" part: "%s"',
file, path, file_name)
else:
log.debug('ignoring file "%s" as it doesn\'t contain'
' any of the extensions %s', file, exts)
return file_list
def get_dir_structure(path: str,
exclude: list[str] = []) -> list[str]:
log.debug('retrieving dir structure in path "%s" except directories (%s)',
path, ', '.join(exclude))
dir_list: list[str] = []
for root, dirs, files in os.walk(path):
if exclude != []:
log.debug('removing excludes from list')
dirs[:] = [d for d in dirs if d not in exclude]
for d in dirs:
if root in dir_list:
dir_list.remove(root)
log.debug('removed dir "%s" as it already is in the list', root)
# not removing the 'path' part here, as comparisons with 'root' would fail
joined_dir: str = os.path.join(root, d)
dir_list.append(joined_dir)
log.debug('added dir "%s" to the list', joined_dir)
log.debug('removing "%s" from all dirs in list', path)
# [1:] is required to remove the '/' at the beginning after replacing
return [d.replace(path, '')[1:] for d in dir_list]
def create_dir(path: str, p: bool = False, silent=False) -> None:
try:
if p:
os.makedirs(path)
else:
os.mkdir(path)
if not silent:
log.info('created directory "%s"', path)
except FileExistsError:
if not silent:
log.info('directory "%s" already exists, ignoring', path)
def copy_file(src: str, dst: str) -> None:
if not os.path.exists(dst):
shutil.copy2(src, dst)
log.info('copied file "%s" to "%s"', src, dst)
else:
log.info('file "%s" already exists, ignoring', dst)
# only used for database, but keeping it here as it is an independent function
# as seen in SO: https://stackoverflow.com/a/1131238
def get_checksum(path: str) -> str:
log.debug('calculating md5 checksum for "%s"', path)
file_hash = md5()
with open(path, "rb") as f:
while chunk := f.read(4096):
file_hash.update(chunk)
return file_hash.hexdigest()
def get_expanded_path(path: str) -> str:
log.debug('expanding path "%s"', path)
expanded_path: str = os.path.normpath(os.path.expandvars(path))
if '$' in expanded_path:
log.error('"$" character found in expanded path "%s";'
' could be due to non-existant env var', expanded_path)
sys.exit(1)
log.debug('expanded path "%s" to "%s"', path, expanded_path)
return expanded_path
|