425 lines
14 KiB
Python
425 lines
14 KiB
Python
|
""":module: watchdog.utils.dirsnapshot
|
||
|
:synopsis: Directory snapshots and comparison.
|
||
|
:author: yesudeep@google.com (Yesudeep Mangalapilly)
|
||
|
:author: contact@tiger-222.fr (Mickaël Schoentgen)
|
||
|
|
||
|
.. ADMONITION:: Where are the moved events? They "disappeared"
|
||
|
|
||
|
This implementation does not take partition boundaries
|
||
|
into consideration. It will only work when the directory
|
||
|
tree is entirely on the same file system. More specifically,
|
||
|
any part of the code that depends on inode numbers can
|
||
|
break if partition boundaries are crossed. In these cases,
|
||
|
the snapshot diff will represent file/directory movement as
|
||
|
created and deleted events.
|
||
|
|
||
|
Classes
|
||
|
-------
|
||
|
.. autoclass:: DirectorySnapshot
|
||
|
:members:
|
||
|
:show-inheritance:
|
||
|
|
||
|
.. autoclass:: DirectorySnapshotDiff
|
||
|
:members:
|
||
|
:show-inheritance:
|
||
|
|
||
|
.. autoclass:: EmptyDirectorySnapshot
|
||
|
:members:
|
||
|
:show-inheritance:
|
||
|
|
||
|
"""
|
||
|
|
||
|
from __future__ import annotations
|
||
|
|
||
|
import contextlib
|
||
|
import errno
|
||
|
import os
|
||
|
from stat import S_ISDIR
|
||
|
from typing import TYPE_CHECKING
|
||
|
|
||
|
if TYPE_CHECKING:
|
||
|
from collections.abc import Iterator
|
||
|
from typing import Any, Callable
|
||
|
|
||
|
|
||
|
class DirectorySnapshotDiff:
|
||
|
"""Compares two directory snapshots and creates an object that represents
|
||
|
the difference between the two snapshots.
|
||
|
|
||
|
:param ref:
|
||
|
The reference directory snapshot.
|
||
|
:type ref:
|
||
|
:class:`DirectorySnapshot`
|
||
|
:param snapshot:
|
||
|
The directory snapshot which will be compared
|
||
|
with the reference snapshot.
|
||
|
:type snapshot:
|
||
|
:class:`DirectorySnapshot`
|
||
|
:param ignore_device:
|
||
|
A boolean indicating whether to ignore the device id or not.
|
||
|
By default, a file may be uniquely identified by a combination of its first
|
||
|
inode and its device id. The problem is that the device id may (or may not)
|
||
|
change between system boots. This problem would cause the DirectorySnapshotDiff
|
||
|
to think a file has been deleted and created again but it would be the
|
||
|
exact same file.
|
||
|
Set to True only if you are sure you will always use the same device.
|
||
|
:type ignore_device:
|
||
|
:class:`bool`
|
||
|
"""
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
ref: DirectorySnapshot,
|
||
|
snapshot: DirectorySnapshot,
|
||
|
*,
|
||
|
ignore_device: bool = False,
|
||
|
) -> None:
|
||
|
created = snapshot.paths - ref.paths
|
||
|
deleted = ref.paths - snapshot.paths
|
||
|
|
||
|
if ignore_device:
|
||
|
|
||
|
def get_inode(directory: DirectorySnapshot, full_path: bytes | str) -> int | tuple[int, int]:
|
||
|
return directory.inode(full_path)[0]
|
||
|
|
||
|
else:
|
||
|
|
||
|
def get_inode(directory: DirectorySnapshot, full_path: bytes | str) -> int | tuple[int, int]:
|
||
|
return directory.inode(full_path)
|
||
|
|
||
|
# check that all unchanged paths have the same inode
|
||
|
for path in ref.paths & snapshot.paths:
|
||
|
if get_inode(ref, path) != get_inode(snapshot, path):
|
||
|
created.add(path)
|
||
|
deleted.add(path)
|
||
|
|
||
|
# find moved paths
|
||
|
moved: set[tuple[bytes | str, bytes | str]] = set()
|
||
|
for path in set(deleted):
|
||
|
inode = ref.inode(path)
|
||
|
new_path = snapshot.path(inode)
|
||
|
if new_path:
|
||
|
# file is not deleted but moved
|
||
|
deleted.remove(path)
|
||
|
moved.add((path, new_path))
|
||
|
|
||
|
for path in set(created):
|
||
|
inode = snapshot.inode(path)
|
||
|
old_path = ref.path(inode)
|
||
|
if old_path:
|
||
|
created.remove(path)
|
||
|
moved.add((old_path, path))
|
||
|
|
||
|
# find modified paths
|
||
|
# first check paths that have not moved
|
||
|
modified: set[bytes | str] = set()
|
||
|
for path in ref.paths & snapshot.paths:
|
||
|
if get_inode(ref, path) == get_inode(snapshot, path) and (
|
||
|
ref.mtime(path) != snapshot.mtime(path) or ref.size(path) != snapshot.size(path)
|
||
|
):
|
||
|
modified.add(path)
|
||
|
|
||
|
for old_path, new_path in moved:
|
||
|
if ref.mtime(old_path) != snapshot.mtime(new_path) or ref.size(old_path) != snapshot.size(new_path):
|
||
|
modified.add(old_path)
|
||
|
|
||
|
self._dirs_created = [path for path in created if snapshot.isdir(path)]
|
||
|
self._dirs_deleted = [path for path in deleted if ref.isdir(path)]
|
||
|
self._dirs_modified = [path for path in modified if ref.isdir(path)]
|
||
|
self._dirs_moved = [(frm, to) for (frm, to) in moved if ref.isdir(frm)]
|
||
|
|
||
|
self._files_created = list(created - set(self._dirs_created))
|
||
|
self._files_deleted = list(deleted - set(self._dirs_deleted))
|
||
|
self._files_modified = list(modified - set(self._dirs_modified))
|
||
|
self._files_moved = list(moved - set(self._dirs_moved))
|
||
|
|
||
|
def __str__(self) -> str:
|
||
|
return self.__repr__()
|
||
|
|
||
|
def __repr__(self) -> str:
|
||
|
fmt = (
|
||
|
"<{0} files(created={1}, deleted={2}, modified={3}, moved={4}),"
|
||
|
" folders(created={5}, deleted={6}, modified={7}, moved={8})>"
|
||
|
)
|
||
|
return fmt.format(
|
||
|
type(self).__name__,
|
||
|
len(self._files_created),
|
||
|
len(self._files_deleted),
|
||
|
len(self._files_modified),
|
||
|
len(self._files_moved),
|
||
|
len(self._dirs_created),
|
||
|
len(self._dirs_deleted),
|
||
|
len(self._dirs_modified),
|
||
|
len(self._dirs_moved),
|
||
|
)
|
||
|
|
||
|
@property
|
||
|
def files_created(self) -> list[bytes | str]:
|
||
|
"""List of files that were created."""
|
||
|
return self._files_created
|
||
|
|
||
|
@property
|
||
|
def files_deleted(self) -> list[bytes | str]:
|
||
|
"""List of files that were deleted."""
|
||
|
return self._files_deleted
|
||
|
|
||
|
@property
|
||
|
def files_modified(self) -> list[bytes | str]:
|
||
|
"""List of files that were modified."""
|
||
|
return self._files_modified
|
||
|
|
||
|
@property
|
||
|
def files_moved(self) -> list[tuple[bytes | str, bytes | str]]:
|
||
|
"""List of files that were moved.
|
||
|
|
||
|
Each event is a two-tuple the first item of which is the path
|
||
|
that has been renamed to the second item in the tuple.
|
||
|
"""
|
||
|
return self._files_moved
|
||
|
|
||
|
@property
|
||
|
def dirs_modified(self) -> list[bytes | str]:
|
||
|
"""List of directories that were modified."""
|
||
|
return self._dirs_modified
|
||
|
|
||
|
@property
|
||
|
def dirs_moved(self) -> list[tuple[bytes | str, bytes | str]]:
|
||
|
"""List of directories that were moved.
|
||
|
|
||
|
Each event is a two-tuple the first item of which is the path
|
||
|
that has been renamed to the second item in the tuple.
|
||
|
"""
|
||
|
return self._dirs_moved
|
||
|
|
||
|
@property
|
||
|
def dirs_deleted(self) -> list[bytes | str]:
|
||
|
"""List of directories that were deleted."""
|
||
|
return self._dirs_deleted
|
||
|
|
||
|
@property
|
||
|
def dirs_created(self) -> list[bytes | str]:
|
||
|
"""List of directories that were created."""
|
||
|
return self._dirs_created
|
||
|
|
||
|
class ContextManager:
|
||
|
"""Context manager that creates two directory snapshots and a
|
||
|
diff object that represents the difference between the two snapshots.
|
||
|
|
||
|
:param path:
|
||
|
The directory path for which a snapshot should be taken.
|
||
|
:type path:
|
||
|
``str``
|
||
|
:param recursive:
|
||
|
``True`` if the entire directory tree should be included in the
|
||
|
snapshot; ``False`` otherwise.
|
||
|
:type recursive:
|
||
|
``bool``
|
||
|
:param stat:
|
||
|
Use custom stat function that returns a stat structure for path.
|
||
|
Currently only st_dev, st_ino, st_mode and st_mtime are needed.
|
||
|
|
||
|
A function taking a ``path`` as argument which will be called
|
||
|
for every entry in the directory tree.
|
||
|
:param listdir:
|
||
|
Use custom listdir function. For details see ``os.scandir``.
|
||
|
:param ignore_device:
|
||
|
A boolean indicating whether to ignore the device id or not.
|
||
|
By default, a file may be uniquely identified by a combination of its first
|
||
|
inode and its device id. The problem is that the device id may (or may not)
|
||
|
change between system boots. This problem would cause the DirectorySnapshotDiff
|
||
|
to think a file has been deleted and created again but it would be the
|
||
|
exact same file.
|
||
|
Set to True only if you are sure you will always use the same device.
|
||
|
:type ignore_device:
|
||
|
:class:`bool`
|
||
|
"""
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
path: str,
|
||
|
*,
|
||
|
recursive: bool = True,
|
||
|
stat: Callable[[str], os.stat_result] = os.stat,
|
||
|
listdir: Callable[[str | None], Iterator[os.DirEntry]] = os.scandir,
|
||
|
ignore_device: bool = False,
|
||
|
) -> None:
|
||
|
self.path = path
|
||
|
self.recursive = recursive
|
||
|
self.stat = stat
|
||
|
self.listdir = listdir
|
||
|
self.ignore_device = ignore_device
|
||
|
|
||
|
def __enter__(self) -> None:
|
||
|
self.pre_snapshot = self.get_snapshot()
|
||
|
|
||
|
def __exit__(self, *args: object) -> None:
|
||
|
self.post_snapshot = self.get_snapshot()
|
||
|
self.diff = DirectorySnapshotDiff(
|
||
|
self.pre_snapshot,
|
||
|
self.post_snapshot,
|
||
|
ignore_device=self.ignore_device,
|
||
|
)
|
||
|
|
||
|
def get_snapshot(self) -> DirectorySnapshot:
|
||
|
return DirectorySnapshot(
|
||
|
path=self.path,
|
||
|
recursive=self.recursive,
|
||
|
stat=self.stat,
|
||
|
listdir=self.listdir,
|
||
|
)
|
||
|
|
||
|
|
||
|
class DirectorySnapshot:
|
||
|
"""A snapshot of stat information of files in a directory.
|
||
|
|
||
|
:param path:
|
||
|
The directory path for which a snapshot should be taken.
|
||
|
:type path:
|
||
|
``str``
|
||
|
:param recursive:
|
||
|
``True`` if the entire directory tree should be included in the
|
||
|
snapshot; ``False`` otherwise.
|
||
|
:type recursive:
|
||
|
``bool``
|
||
|
:param stat:
|
||
|
Use custom stat function that returns a stat structure for path.
|
||
|
Currently only st_dev, st_ino, st_mode and st_mtime are needed.
|
||
|
|
||
|
A function taking a ``path`` as argument which will be called
|
||
|
for every entry in the directory tree.
|
||
|
:param listdir:
|
||
|
Use custom listdir function. For details see ``os.scandir``.
|
||
|
"""
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
path: str,
|
||
|
*,
|
||
|
recursive: bool = True,
|
||
|
stat: Callable[[str], os.stat_result] = os.stat,
|
||
|
listdir: Callable[[str | None], Iterator[os.DirEntry]] = os.scandir,
|
||
|
) -> None:
|
||
|
self.recursive = recursive
|
||
|
self.stat = stat
|
||
|
self.listdir = listdir
|
||
|
|
||
|
self._stat_info: dict[bytes | str, os.stat_result] = {}
|
||
|
self._inode_to_path: dict[tuple[int, int], bytes | str] = {}
|
||
|
|
||
|
st = self.stat(path)
|
||
|
self._stat_info[path] = st
|
||
|
self._inode_to_path[(st.st_ino, st.st_dev)] = path
|
||
|
|
||
|
for p, st in self.walk(path):
|
||
|
i = (st.st_ino, st.st_dev)
|
||
|
self._inode_to_path[i] = p
|
||
|
self._stat_info[p] = st
|
||
|
|
||
|
def walk(self, root: str) -> Iterator[tuple[str, os.stat_result]]:
|
||
|
try:
|
||
|
paths = [os.path.join(root, entry.name) for entry in self.listdir(root)]
|
||
|
except OSError as e:
|
||
|
# Directory may have been deleted between finding it in the directory
|
||
|
# list of its parent and trying to delete its contents. If this
|
||
|
# happens we treat it as empty. Likewise if the directory was replaced
|
||
|
# with a file of the same name (less likely, but possible).
|
||
|
if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
|
||
|
return
|
||
|
else:
|
||
|
raise
|
||
|
|
||
|
entries = []
|
||
|
for p in paths:
|
||
|
with contextlib.suppress(OSError):
|
||
|
entry = (p, self.stat(p))
|
||
|
entries.append(entry)
|
||
|
yield entry
|
||
|
|
||
|
if self.recursive:
|
||
|
for path, st in entries:
|
||
|
with contextlib.suppress(PermissionError):
|
||
|
if S_ISDIR(st.st_mode):
|
||
|
yield from self.walk(path)
|
||
|
|
||
|
@property
|
||
|
def paths(self) -> set[bytes | str]:
|
||
|
"""Set of file/directory paths in the snapshot."""
|
||
|
return set(self._stat_info.keys())
|
||
|
|
||
|
def path(self, uid: tuple[int, int]) -> bytes | str | None:
|
||
|
"""Returns path for id. None if id is unknown to this snapshot."""
|
||
|
return self._inode_to_path.get(uid)
|
||
|
|
||
|
def inode(self, path: bytes | str) -> tuple[int, int]:
|
||
|
"""Returns an id for path."""
|
||
|
st = self._stat_info[path]
|
||
|
return (st.st_ino, st.st_dev)
|
||
|
|
||
|
def isdir(self, path: bytes | str) -> bool:
|
||
|
return S_ISDIR(self._stat_info[path].st_mode)
|
||
|
|
||
|
def mtime(self, path: bytes | str) -> float:
|
||
|
return self._stat_info[path].st_mtime
|
||
|
|
||
|
def size(self, path: bytes | str) -> int:
|
||
|
return self._stat_info[path].st_size
|
||
|
|
||
|
def stat_info(self, path: bytes | str) -> os.stat_result:
|
||
|
"""Returns a stat information object for the specified path from
|
||
|
the snapshot.
|
||
|
|
||
|
Attached information is subject to change. Do not use unless
|
||
|
you specify `stat` in constructor. Use :func:`inode`, :func:`mtime`,
|
||
|
:func:`isdir` instead.
|
||
|
|
||
|
:param path:
|
||
|
The path for which stat information should be obtained
|
||
|
from a snapshot.
|
||
|
"""
|
||
|
return self._stat_info[path]
|
||
|
|
||
|
def __sub__(self, previous_dirsnap: DirectorySnapshot) -> DirectorySnapshotDiff:
|
||
|
"""Allow subtracting a DirectorySnapshot object instance from
|
||
|
another.
|
||
|
|
||
|
:returns:
|
||
|
A :class:`DirectorySnapshotDiff` object.
|
||
|
"""
|
||
|
return DirectorySnapshotDiff(previous_dirsnap, self)
|
||
|
|
||
|
def __str__(self) -> str:
|
||
|
return self.__repr__()
|
||
|
|
||
|
def __repr__(self) -> str:
|
||
|
return str(self._stat_info)
|
||
|
|
||
|
|
||
|
class EmptyDirectorySnapshot(DirectorySnapshot):
|
||
|
"""Class to implement an empty snapshot. This is used together with
|
||
|
DirectorySnapshot and DirectorySnapshotDiff in order to get all the files/folders
|
||
|
in the directory as created.
|
||
|
"""
|
||
|
|
||
|
def __init__(self) -> None:
|
||
|
pass
|
||
|
|
||
|
@staticmethod
|
||
|
def path(_: Any) -> None:
|
||
|
"""Mock up method to return the path of the received inode. As the snapshot
|
||
|
is intended to be empty, it always returns None.
|
||
|
|
||
|
:returns:
|
||
|
None.
|
||
|
"""
|
||
|
return
|
||
|
|
||
|
@property
|
||
|
def paths(self) -> set:
|
||
|
"""Mock up method to return a set of file/directory paths in the snapshot. As
|
||
|
the snapshot is intended to be empty, it always returns an empty set.
|
||
|
|
||
|
:returns:
|
||
|
An empty set.
|
||
|
"""
|
||
|
return set()
|