Source code for dl_utils.fs.list_files
# -*- coding: utf-8 -*-
# @Time : 5/23/25
# @Author : Yaojie Shen
# @Project : Deep-Learning-Utils
# @File : list_files.py
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Union, List
[docs]
def list_files(path: str, depth: Union[int, None] = None) -> List[str]:
"""
List all files in a folder recursively.
Args:
path: Root path to start the search.
depth: Maximum depth to search.
If None, there is no depth limit.
If 0 or less, stop searching deeper.
Returns: A List of file paths found under the given path.
"""
if os.path.isdir(path):
if depth is not None and depth <= 0:
return []
next_depth = depth - 1 if depth is not None else None
files = []
for entry in os.listdir(path):
full_path = os.path.join(path, entry)
files.extend(list_files(full_path, depth=next_depth))
return files
else:
return [path]
[docs]
def list_files_multithread(directory, n_jobs=16, depth: Union[int, None] = None):
"""
List all files in a directory recursively using multiple threads.
Useful for list files on NFS.
Args:
directory: The directory to search.
n_jobs: Number of parallel jobs (threads) to use.
depth: Maximum recursion depth. If None, no depth limit.
Returns: List of all file paths found under the directory.
"""
entries = os.listdir(directory)
next_depth = depth - 1 if depth is not None else None
results = []
with ThreadPoolExecutor(max_workers=n_jobs) as executor:
futures = {
executor.submit(list_files, os.path.join(directory, entry), depth=next_depth): entry
for entry in entries
}
for future in as_completed(futures):
results.extend(future.result())
return results
__all__ = [
"list_files",
"list_files_multithread",
]