""":module: watchdog.utils.dirsnapshot :synopsis: Directory snapshots and comparison. :author: yesudeep@google.com (Yesudeep Mangalapilly) :author: contact@tiger-222.fr (Mickaƫl Schoentgen) .. ADMONITION:: Where are the moved events? They "disappeared" This implementation does not take partition boundaries into consideration. It will only work when the directory tree is entirely on the same file system. More specifically, any part of the code that depends on inode numbers can break if partition boundaries are crossed. In these cases, the snapshot diff will represent file/directory movement as created and deleted events. Classes ------- .. autoclass:: DirectorySnapshot :members: :show-inheritance: .. autoclass:: DirectorySnapshotDiff :members: :show-inheritance: .. autoclass:: EmptyDirectorySnapshot :members: :show-inheritance: """ from __future__ import annotations import contextlib import errno import os from stat import S_ISDIR from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Iterator from typing import Any, Callable class DirectorySnapshotDiff: """Compares two directory snapshots and creates an object that represents the difference between the two snapshots. :param ref: The reference directory snapshot. :type ref: :class:`DirectorySnapshot` :param snapshot: The directory snapshot which will be compared with the reference snapshot. :type snapshot: :class:`DirectorySnapshot` :param ignore_device: A boolean indicating whether to ignore the device id or not. By default, a file may be uniquely identified by a combination of its first inode and its device id. The problem is that the device id may (or may not) change between system boots. This problem would cause the DirectorySnapshotDiff to think a file has been deleted and created again but it would be the exact same file. Set to True only if you are sure you will always use the same device. :type ignore_device: :class:`bool` """ def __init__( self, ref: DirectorySnapshot, snapshot: DirectorySnapshot, *, ignore_device: bool = False, ) -> None: created = snapshot.paths - ref.paths deleted = ref.paths - snapshot.paths if ignore_device: def get_inode(directory: DirectorySnapshot, full_path: bytes | str) -> int | tuple[int, int]: return directory.inode(full_path)[0] else: def get_inode(directory: DirectorySnapshot, full_path: bytes | str) -> int | tuple[int, int]: return directory.inode(full_path) # check that all unchanged paths have the same inode for path in ref.paths & snapshot.paths: if get_inode(ref, path) != get_inode(snapshot, path): created.add(path) deleted.add(path) # find moved paths moved: set[tuple[bytes | str, bytes | str]] = set() for path in set(deleted): inode = ref.inode(path) new_path = snapshot.path(inode) if new_path: # file is not deleted but moved deleted.remove(path) moved.add((path, new_path)) for path in set(created): inode = snapshot.inode(path) old_path = ref.path(inode) if old_path: created.remove(path) moved.add((old_path, path)) # find modified paths # first check paths that have not moved modified: set[bytes | str] = set() for path in ref.paths & snapshot.paths: if get_inode(ref, path) == get_inode(snapshot, path) and ( ref.mtime(path) != snapshot.mtime(path) or ref.size(path) != snapshot.size(path) ): modified.add(path) for old_path, new_path in moved: if ref.mtime(old_path) != snapshot.mtime(new_path) or ref.size(old_path) != snapshot.size(new_path): modified.add(old_path) self._dirs_created = [path for path in created if snapshot.isdir(path)] self._dirs_deleted = [path for path in deleted if ref.isdir(path)] self._dirs_modified = [path for path in modified if ref.isdir(path)] self._dirs_moved = [(frm, to) for (frm, to) in moved if ref.isdir(frm)] self._files_created = list(created - set(self._dirs_created)) self._files_deleted = list(deleted - set(self._dirs_deleted)) self._files_modified = list(modified - set(self._dirs_modified)) self._files_moved = list(moved - set(self._dirs_moved)) def __str__(self) -> str: return self.__repr__() def __repr__(self) -> str: fmt = ( "<{0} files(created={1}, deleted={2}, modified={3}, moved={4})," " folders(created={5}, deleted={6}, modified={7}, moved={8})>" ) return fmt.format( type(self).__name__, len(self._files_created), len(self._files_deleted), len(self._files_modified), len(self._files_moved), len(self._dirs_created), len(self._dirs_deleted), len(self._dirs_modified), len(self._dirs_moved), ) @property def files_created(self) -> list[bytes | str]: """List of files that were created.""" return self._files_created @property def files_deleted(self) -> list[bytes | str]: """List of files that were deleted.""" return self._files_deleted @property def files_modified(self) -> list[bytes | str]: """List of files that were modified.""" return self._files_modified @property def files_moved(self) -> list[tuple[bytes | str, bytes | str]]: """List of files that were moved. Each event is a two-tuple the first item of which is the path that has been renamed to the second item in the tuple. """ return self._files_moved @property def dirs_modified(self) -> list[bytes | str]: """List of directories that were modified.""" return self._dirs_modified @property def dirs_moved(self) -> list[tuple[bytes | str, bytes | str]]: """List of directories that were moved. Each event is a two-tuple the first item of which is the path that has been renamed to the second item in the tuple. """ return self._dirs_moved @property def dirs_deleted(self) -> list[bytes | str]: """List of directories that were deleted.""" return self._dirs_deleted @property def dirs_created(self) -> list[bytes | str]: """List of directories that were created.""" return self._dirs_created class ContextManager: """Context manager that creates two directory snapshots and a diff object that represents the difference between the two snapshots. :param path: The directory path for which a snapshot should be taken. :type path: ``str`` :param recursive: ``True`` if the entire directory tree should be included in the snapshot; ``False`` otherwise. :type recursive: ``bool`` :param stat: Use custom stat function that returns a stat structure for path. Currently only st_dev, st_ino, st_mode and st_mtime are needed. A function taking a ``path`` as argument which will be called for every entry in the directory tree. :param listdir: Use custom listdir function. For details see ``os.scandir``. :param ignore_device: A boolean indicating whether to ignore the device id or not. By default, a file may be uniquely identified by a combination of its first inode and its device id. The problem is that the device id may (or may not) change between system boots. This problem would cause the DirectorySnapshotDiff to think a file has been deleted and created again but it would be the exact same file. Set to True only if you are sure you will always use the same device. :type ignore_device: :class:`bool` """ def __init__( self, path: str, *, recursive: bool = True, stat: Callable[[str], os.stat_result] = os.stat, listdir: Callable[[str | None], Iterator[os.DirEntry]] = os.scandir, ignore_device: bool = False, ) -> None: self.path = path self.recursive = recursive self.stat = stat self.listdir = listdir self.ignore_device = ignore_device def __enter__(self) -> None: self.pre_snapshot = self.get_snapshot() def __exit__(self, *args: object) -> None: self.post_snapshot = self.get_snapshot() self.diff = DirectorySnapshotDiff( self.pre_snapshot, self.post_snapshot, ignore_device=self.ignore_device, ) def get_snapshot(self) -> DirectorySnapshot: return DirectorySnapshot( path=self.path, recursive=self.recursive, stat=self.stat, listdir=self.listdir, ) class DirectorySnapshot: """A snapshot of stat information of files in a directory. :param path: The directory path for which a snapshot should be taken. :type path: ``str`` :param recursive: ``True`` if the entire directory tree should be included in the snapshot; ``False`` otherwise. :type recursive: ``bool`` :param stat: Use custom stat function that returns a stat structure for path. Currently only st_dev, st_ino, st_mode and st_mtime are needed. A function taking a ``path`` as argument which will be called for every entry in the directory tree. :param listdir: Use custom listdir function. For details see ``os.scandir``. """ def __init__( self, path: str, *, recursive: bool = True, stat: Callable[[str], os.stat_result] = os.stat, listdir: Callable[[str | None], Iterator[os.DirEntry]] = os.scandir, ) -> None: self.recursive = recursive self.stat = stat self.listdir = listdir self._stat_info: dict[bytes | str, os.stat_result] = {} self._inode_to_path: dict[tuple[int, int], bytes | str] = {} st = self.stat(path) self._stat_info[path] = st self._inode_to_path[(st.st_ino, st.st_dev)] = path for p, st in self.walk(path): i = (st.st_ino, st.st_dev) self._inode_to_path[i] = p self._stat_info[p] = st def walk(self, root: str) -> Iterator[tuple[str, os.stat_result]]: try: paths = [os.path.join(root, entry.name) for entry in self.listdir(root)] except OSError as e: # Directory may have been deleted between finding it in the directory # list of its parent and trying to delete its contents. If this # happens we treat it as empty. Likewise if the directory was replaced # with a file of the same name (less likely, but possible). if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL): return else: raise entries = [] for p in paths: with contextlib.suppress(OSError): entry = (p, self.stat(p)) entries.append(entry) yield entry if self.recursive: for path, st in entries: with contextlib.suppress(PermissionError): if S_ISDIR(st.st_mode): yield from self.walk(path) @property def paths(self) -> set[bytes | str]: """Set of file/directory paths in the snapshot.""" return set(self._stat_info.keys()) def path(self, uid: tuple[int, int]) -> bytes | str | None: """Returns path for id. None if id is unknown to this snapshot.""" return self._inode_to_path.get(uid) def inode(self, path: bytes | str) -> tuple[int, int]: """Returns an id for path.""" st = self._stat_info[path] return (st.st_ino, st.st_dev) def isdir(self, path: bytes | str) -> bool: return S_ISDIR(self._stat_info[path].st_mode) def mtime(self, path: bytes | str) -> float: return self._stat_info[path].st_mtime def size(self, path: bytes | str) -> int: return self._stat_info[path].st_size def stat_info(self, path: bytes | str) -> os.stat_result: """Returns a stat information object for the specified path from the snapshot. Attached information is subject to change. Do not use unless you specify `stat` in constructor. Use :func:`inode`, :func:`mtime`, :func:`isdir` instead. :param path: The path for which stat information should be obtained from a snapshot. """ return self._stat_info[path] def __sub__(self, previous_dirsnap: DirectorySnapshot) -> DirectorySnapshotDiff: """Allow subtracting a DirectorySnapshot object instance from another. :returns: A :class:`DirectorySnapshotDiff` object. """ return DirectorySnapshotDiff(previous_dirsnap, self) def __str__(self) -> str: return self.__repr__() def __repr__(self) -> str: return str(self._stat_info) class EmptyDirectorySnapshot(DirectorySnapshot): """Class to implement an empty snapshot. This is used together with DirectorySnapshot and DirectorySnapshotDiff in order to get all the files/folders in the directory as created. """ def __init__(self) -> None: pass @staticmethod def path(_: Any) -> None: """Mock up method to return the path of the received inode. As the snapshot is intended to be empty, it always returns None. :returns: None. """ return @property def paths(self) -> set: """Mock up method to return a set of file/directory paths in the snapshot. As the snapshot is intended to be empty, it always returns an empty set. :returns: An empty set. """ return set()