summaryrefslogtreecommitdiff
path: root/src/pyssg/utils.py
blob: e63ee08ce91c1f4741e57e83ef92334934ff44bd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import sys
import shutil
from hashlib import md5
from logging import Logger, getLogger

log: Logger = getLogger(__name__)


# TODO: add file exclusion option
def get_file_list(path: str,
                  exts: tuple[str],
                  exclude_dirs: list[str] = []) -> list[str]:
    log.debug('retrieving file list in path "%s" that contain file'
              ' extensions (%s) except directories (%s)',
              path, ', '.join(exts),
              ', '.join(exclude_dirs))
    file_list: list[str] = []
    for root, dirs, files in os.walk(path):
        if exclude_dirs != []:
            log.debug('removing excludes from list')
            dirs[:] = [d for d in dirs if d not in exclude_dirs]
        for file in files:
            if file.endswith(exts):
                # [1:] is required to remove the '/' at the beginning after replacing
                file_name: str = os.path.join(root, file).replace(path, '')[1:]
                file_list.append(file_name)
                log.debug('added file "%s" without "%s" part: "%s"',
                          file, path, file_name)
            else:
                log.debug('ignoring file "%s" as it doesn\'t contain'
                          ' any of the extensions (%s)', file, ', '.join(exts))
    return file_list


def get_dir_structure(path: str,
                      exclude: list[str] = []) -> list[str]:
    log.debug('retrieving dir structure in path "%s" except directories (%s)',
              path, ', '.join(exclude))
    dir_list: list[str] = []
    for root, dirs, files in os.walk(path):
        if exclude != []:
            log.debug('removing excludes from list')
            dirs[:] = [d for d in dirs if d not in exclude]
        for d in dirs:
            if root in dir_list:
                dir_list.remove(root)
                log.debug('removed dir "%s" as it already is in the list', root)
            # not removing the 'path' part here, as comparisons with 'root' would fail
            joined_dir: str = os.path.join(root, d)
            dir_list.append(joined_dir)
            log.debug('added dir "%s" to the list', joined_dir)
    log.debug('removing "%s" from all dirs in list', path)
    # [1:] is required to remove the '/' at the beginning after replacing
    return [d.replace(path, '')[1:] for d in dir_list]


def create_dir(path: str, p: bool = False, silent=False) -> None:
    try:
        if p:
            os.makedirs(path)
        else:
            os.mkdir(path)
        if not silent:
            log.info('created directory "%s"', path)
    except FileExistsError:
        if not silent:
            log.info('directory "%s" already exists, ignoring', path)


def copy_file(src: str, dst: str) -> None:
    if not os.path.exists(dst):
        shutil.copy2(src, dst)
        log.info('copied file "%s" to "%s"', src, dst)
    else:
        log.info('file "%s" already exists, ignoring', dst)


# only used for database, but keeping it here as it is an independent function
# as seen in SO: https://stackoverflow.com/a/1131238
def get_checksum(path: str) -> str:
    log.debug('calculating md5 checksum for "%s"', path)
    file_hash = md5()
    with open(path, "rb") as f:
        while chunk := f.read(4096):
            file_hash.update(chunk)
    return file_hash.hexdigest()


def get_expanded_path(path: str) -> str:
    log.debug('expanding path "%s"', path)
    expanded_path: str = os.path.normpath(os.path.expandvars(path))
    if '$' in expanded_path:
        log.error('"$" character found in expanded path "%s";'
                  ' could be due to non-existant env var', expanded_path)
        sys.exit(1)
    log.debug('expanded path "%s" to "%s"', path, expanded_path)
    return expanded_path