Source code for filecache.file_cache_path

##########################################################################################
# filecache/file_cache_path.py
##########################################################################################

from __future__ import annotations

import contextlib
import functools
import os
from pathlib import Path
import re
import sys
from typing import (cast,
                    Any,
                    Callable,
                    Generator,
                    Iterator,
                    IO,
                    Optional,
                    TYPE_CHECKING)

if TYPE_CHECKING:  # pragma: no cover
    from .file_cache import FileCache  # Circular import

from .file_cache_types import (StrOrPathOrSeqType,
                               UrlToPathFuncOrSeqType,
                               UrlToUrlFuncOrSeqType)


# This FileCache is used when an FCPath is created without specifying a particular
# FileCache and the FCPath is actually used to perform an operation that needs that
# FileCache.
_DEFAULT_FILECACHE: Optional[FileCache] = None



[docs]
class FCPath:
    """Rewrite of the Python pathlib.Path class that supports URLs and FileCache.

    This class provides a simpler way to abstract away remote access in a FileCache by
    emulating the Python pathlib.Path class. At the same time, it can collect common
    parameters (`anonymous`, `lock_timeout`, `nthreads`) into a single location so that
    they do not have to be specified on every method call.
    """

    _filecache: Optional["FileCache"]
    _anonymous: Optional[bool]
    _lock_timeout: Optional[int]
    _nthreads: Optional[int]
    _url_to_url: Optional[UrlToUrlFuncOrSeqType]
    _url_to_path: Optional[UrlToPathFuncOrSeqType]


[docs]
    def __init__(self,
                 *paths: str | Path | FCPath | None,
                 filecache: Optional["FileCache"] = None,
                 anonymous: Optional[bool] = None,
                 lock_timeout: Optional[int] = None,
                 nthreads: Optional[int] = None,
                 url_to_url: Optional[UrlToUrlFuncOrSeqType] = None,
                 url_to_path: Optional[UrlToPathFuncOrSeqType] = None,
                 copy_from: Optional[FCPath] = None
                 ):
        """Initialization for the FCPath class.

        Parameters:
            paths: The path(s). These may be absolute or relative paths. They are joined
                together to form a final path. File operations can only be performed on
                absolute paths.
            file_cache: The :class:`FileCache` in which to store files retrieved from this
                path. If not specified, the default global :class:`FileCache` will be
                used.
            anonymous: If True, access cloud resources without specifying credentials. If
                False, credentials must be initialized in the program's environment. If
                None, use the default setting for the associated :class:`FileCache`
                instance.
            lock_timeout: How long to wait, in seconds, if another process is marked as
                retrieving the file before raising an exception. 0 means to not wait at
                all. A negative value means to never time out. None means to use the
                default value for the associated :class:`FileCache` instance.
            nthreads: The maximum number of threads to use when doing multiple-file
                retrieval or upload. If None, use the default value for the associated
                :class:`FileCache` instance.
            url_to_url: The function (or list of functions) that is used to translate URLs
                into URLs. A user-specified translator function takes three arguments::

                    func(scheme: str, remote: str, path: str) -> str

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, and `path` is the rest of the URL. If the translator wants to
                override the default translation, it can return a new complete URL as a
                string. Otherwise, it returns None. If more than one translator is
                specified, they are called in order until one returns a URL, or it falls
                through to the default.

                If None, use the default translators for the associated :class:`FileCache`
                instance.
            url_to_path: The function (or list of functions) that is used to translate
                URLs into local paths. By default, :class:`FileCache` uses a directory
                hierarchy consisting of ``<cache_dir>/<cache_name>/<source>/<path>``,
                where ``source`` is the URL prefix converted to a filesystem-friendly
                format (e.g. ``gs://bucket`` is converted to ``gs_bucket``). A
                user-specified translator function takes five arguments::

                    func(scheme: str, remote: str, path: str, cache_dir: Path,
                         cache_subdir: str) -> str | Path

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, `path` is the rest of the URL, `cache_dir` is the top-level
                directory of the cache (``<cache_dir>/<cache_name>``), and `cache_subdir`
                is the subdirectory specific to this scheme and remote. If the translator
                wants to override the default translation, it can return a Path.
                Otherwise, it returns None. If the returned Path is relative, if will be
                appended to `cache_dir`; if it is absolute, it will be used directly (be
                very careful with this, as it has the ability to access files outside of
                the cache directory). If more than one translator is specified, they are
                called in order until one returns a Path, or it falls through to the
                default. Note that `url_to_path` operates on the original URL, not the
                URL generated by a `url_to_url` translator.

                If None, use the default translators for the associated :class:`FileCache`
                instance.
            copy_from: An FCPath instance to copy internal parameters (`file_cache`,
                `anonymous`, `lock_timeout`, `nthreads`, `url_to_url`, and `url_to_path`)
                from. If specified, any values for these parameters in this constructor
                are ignored. Used internally and should not be used by external
                programmers.
        """

        self._path = self._join(*paths)

        if copy_from is None and len(paths) > 0 and isinstance(paths[0], FCPath):
            copy_from = paths[0]

        if copy_from is not None:
            self._filecache = copy_from._filecache
            self._anonymous = copy_from._anonymous
            self._lock_timeout = copy_from._lock_timeout
            self._nthreads = copy_from._nthreads
            self._url_to_url = copy_from._url_to_url
            self._url_to_path = copy_from._url_to_path
        else:
            self._filecache = filecache
            self._anonymous = anonymous
            self._lock_timeout = lock_timeout
            if nthreads is not None and (not isinstance(nthreads, int) or nthreads <= 0):
                raise ValueError(f'nthreads must be a positive integer, got {nthreads}')
            self._nthreads = nthreads
            self._url_to_url = url_to_url
            self._url_to_path = url_to_path

        self._pathlib: Optional[Path] = None
        self._upload_counter = 0
        self._download_counter = 0


    def _validate_nthreads(self,
                           nthreads: Optional[int]) -> int | None:
        if nthreads is not None and (not isinstance(nthreads, int) or nthreads <= 0):
            raise ValueError(f'nthreads must be a positive integer, got {nthreads}')
        if nthreads is None:
            nthreads = self._nthreads
        return nthreads

    @staticmethod
    def _split_parts(path: str | Path) -> tuple[str, str, str]:
        """Split a path into drive, root, and remainder of path."""

        from .file_cache import FileCache  # Circular import avoidance

        path = str(path).replace('\\', '/')
        drive = ''
        root = ''
        if len(path) >= 2 and path[0].isalpha() and path[1] == ':':
            # Windows C:
            drive = path[0:2].upper()
            path = path[2:]

        elif path.startswith('//'):
            # UNC //host/share
            path2 = path[2:]

            try:
                idx = path2.index('/')
            except ValueError:
                raise ValueError(f'UNC path does not include share name {path!r}')
            if idx == 0:
                raise ValueError(f'UNC path does not include hostname {path!r}')

            try:
                idx2 = path2[idx+1:].index('/')
            except ValueError:
                # It's just a share name like //host/share
                drive = path
                path = ''
            else:
                # It's a share plus path like //host/share/path
                # We include the leading /
                if idx2 == 0:
                    raise ValueError(f'UNC path does not include share {path!r}')
                drive = path[:idx+idx2+3]
                path = path[idx+idx2+3:]

        elif path.startswith(FileCache.registered_scheme_prefixes()):
            # Cloud
            idx = path.index('://')
            path2 = path[idx+3:]
            if path2 == '':
                raise ValueError(f'URI does not include remote name {path!r}')
            try:
                idx2 = path2.index('/')
            except ValueError:
                # It's just a remote name like gs://bucket; we still make it absolute
                drive = path
                path = '/'
            else:
                # It's a remote name plus path like gs://bucket/path
                # We include the leading /
                if idx2 == 0 and not path.startswith('file://'):
                    raise ValueError(f'URI does not include remote name {path!r}')
                drive = path[:idx+idx2+3]
                path = path[idx+idx2+3:]

        if path.startswith('/'):
            root = '/'

        if path != root:
            path = path.rstrip('/')

        return drive, root, path

    @staticmethod
    def _split(path: str) -> tuple[str, str]:
        """Split a path into head,tail similarly to os.path.split."""

        if path == '':
            return '', ''
        drive, root, subpath = FCPath._split_parts(path)
        if '/' not in subpath:
            return drive, subpath
        if root == '/' and subpath == root:
            return drive + '/', ''
        idx = subpath.rindex('/')
        if idx == 0:
            return drive + '/', subpath[idx+1:]
        return drive + subpath[:idx].rstrip('/'), subpath[idx+1:]

    @staticmethod
    def _is_absolute(path: str) -> bool:
        """Check if a path string is an absolute path."""

        return FCPath._split_parts(path)[1] == '/'

    @staticmethod
    def _join(*paths: str | Path | FCPath | None) -> str:
        """Join multiple strings together into a single path.

        Any time an absolute path is found in the path list, the new path starts
        over.
        """

        ret = ''
        for path in paths:
            if path is None:
                continue
            if not isinstance(path, (str, Path, FCPath)):
                raise TypeError(f'path {path!r} is not a str, Path, or FCPath')
            path = str(path)
            if not path:
                continue
            drive, root, subpath = FCPath._split_parts(path)
            while '//' in subpath:
                subpath = subpath.replace('//', '/')
            if root == '/':  # Absolute path - start over
                ret = ''
            if ret == '':
                ret = drive
            elif ret != '' and ret[-1] != '/' and subpath != '' and subpath[0] != '/':
                ret += '/'
            if not (subpath == '/' and '://' in drive):
                ret = ret + subpath

        return ret

    @staticmethod
    def _filename(path: str) -> str:
        """Return just the filename part of a path."""

        _, _, subpath = FCPath._split_parts(path)
        if '/' not in subpath:
            return subpath
        return subpath[subpath.rfind('/') + 1:]

    @property
    def _stack(self) -> tuple[str, list[str]]:
        """Split the path into a 2-tuple (anchor, parts).

        *anchor* is the uppermost parent of the path (equivalent to path.parents[-1]), and
        *parts* is a reversed list of parts following the anchor.
        """

        path = self._path
        parent, name = FCPath._split(path)
        names = []
        while path != parent:
            names.append(name)
            path = parent
            parent, name = FCPath._split(path)
        return path, names


[docs]
    def __str__(self) -> str:
        return self._path


    @property
    def path(self) -> str:
        """Return this path as a string."""

        return self._path


[docs]
    def as_pathlib(self) -> Path:
        """Return this path as a pathlib Path object."""

        if self._pathlib is None:
            if not self.is_local():
                raise ValueError(f'Cannot convert {self} to pathlib.Path')
            self._pathlib = Path(self._path)
        return self._pathlib



[docs]
    def as_posix(self) -> str:
        """Return this FCPath as a POSIX path. This is a str using only forward slashes.

        Notes:
            Because URLs are not really supported in POSIX format, we just return the
            URL as-is, including any scheme and remote.

        Returns:
            This path as a POSIX path.
        """

        return self._path


    @property
    def drive(self) -> str:
        """The drive associated with this FCPath.

        Notes:
            Examples:

                For a Windows path: '' or 'C:'

                For a UNC share: '//host/share'

                For a cloud resource: 'gs://bucket'
        """

        return self._split_parts(self._path)[0]

    @property
    def root(self) -> str:
        """The root of this FCPath; '/' if absolute, '' otherwise."""

        return self._split_parts(self._path)[1]

    @property
    def anchor(self) -> str:
        """The anchor of this FCPath, which is drive + root."""

        return ''.join(self._split_parts(self._path)[0:2])

    @property
    def suffix(self) -> str:
        """The final component's last suffix, if any, including the leading period."""

        name = FCPath._filename(self._path)
        i = name.rfind('.')
        if 0 < i < len(name) - 1:
            return name[i:]
        else:
            return ''

    @property
    def suffixes(self) -> list[str]:
        """A list of the final component's suffixes, including the leading periods."""

        name = FCPath._filename(self._path)
        if name.endswith('.'):
            return []
        name = name.lstrip('.')
        return ['.' + suffix for suffix in name.split('.')[1:]]

    @property
    def stem(self) -> str:
        """The final path component, minus its last suffix."""

        name = FCPath._filename(self._path)
        i = name.rfind('.')
        if 0 < i < len(name) - 1:
            return name[:i]
        else:
            return name


[docs]
    def with_name(self, name: str) -> FCPath:
        """Return a new FCPath with the filename changed.

        Parameters:
            name: The new filename to replace the final path component with.

        Returns:
            A new FCPath with the final component replaced. The new FCPath will have
            the same parameters (`filecache`, etc.) as the source FCPath.
        """

        drive, root, subpath = FCPath._split_parts(self._path)
        drive2, root2, subpath2 = FCPath._split_parts(name)
        if drive2 != '' or root2 != '' or subpath2 == '' or '/' in subpath2:
            raise ValueError(f"Invalid name {name!r}")

        if '/' not in subpath:
            return FCPath(drive + name, copy_from=self)

        return FCPath(drive + subpath[:subpath.rfind('/')+1:] + name,
                      copy_from=self)



[docs]
    def with_stem(self, stem: str) -> FCPath:
        """Return a new FCPath with the stem (the filename minus the suffix) changed.

        Parameters:
            stem: The new stem.

        Returns:
            A new FCPath with the final component's stem replaced. The new FCPath will
            have the same parameters (`filecache`, etc.) as the source FCPath.
        """

        suffix = self.suffix
        if not suffix:
            return self.with_name(stem)
        elif not stem:
            # If the suffix is non-empty, we can't make the stem empty.
            raise ValueError(f"{self!r} has a non-empty suffix")
        else:
            return self.with_name(stem + suffix)



[docs]
    def with_suffix(self, suffix: str) -> FCPath:
        """Return a new FCPath with the file suffix changed.

        If the path has no suffix, add the given suffix. If the given suffix is an empty
        string, remove the suffix from the path.

        Parameters:
            suffix: The new suffix to use.

        Returns:
            A new FCPath with the final component's suffix replaced. The new FCPath will
            have the same parameters (`filecache`, etc.) as the source FCPath.
        """

        stem = self.stem
        if not stem:
            # If the stem is empty, we can't make the suffix non-empty.
            raise ValueError(f"{self!r} has an empty name")
        elif suffix and not (suffix.startswith('.') and len(suffix) > 1):
            raise ValueError(f"Invalid suffix {suffix!r}")
        else:
            return self.with_name(stem + suffix)


    @property
    def parts(self) -> tuple[str, ...]:
        """An object providing sequence-like access to the components in the path."""

        anchor, parts = self._stack
        if anchor:
            parts.append(anchor)
        return tuple(reversed(parts))


[docs]
    def joinpath(self,
                 *pathsegments: str | Path | FCPath | None) -> FCPath:
        """Combine this path with additional paths.

        Parameters:
            pathsegments: One or more additional paths to join with this path.

        Returns:
            A new FCPath that is a combination of this path and the additional paths. The
            new FCPath will have the same parameters (`filecache`, etc.) as the source
            FCPath.
        """

        return FCPath(self._path, *pathsegments, copy_from=self)



[docs]
    def __truediv__(self,
                    other: str | Path | FCPath | None) -> FCPath:
        """Combine this path with an additional path.

        Parameters:
            other: The path to join with this path.

        Returns:
            A new FCPath that is a combination of this path and the other path. The new
            FCPath will have the same parameters (`filecache`, etc.) as the current
            FCPath.
        """

        return FCPath(self._path, other, copy_from=self)



[docs]
    def __rtruediv__(self, other: str | Path | FCPath) -> FCPath:
        """Combine an additional path with this path.

        Parameters:
            other: The path to join with this path.

        Returns:
            A new FCPath that is a combination of the other path and this path. The new
            FCPath will have the same parameters (`filecache`, etc.) as the other path if
            the other path is an FCPath; otherwise it will have the same parameters as
            the current FCPath.
        """

        if isinstance(other, FCPath):  # pragma: no cover
            # This shouldn't be possible to hit because __truediv__ will catch it
            return FCPath(other, self._path, copy_from=other)
        else:
            return FCPath(other, self._path, copy_from=self)



[docs]
    def splitpath(self, search_dir: str) -> tuple[FCPath, ...]:
        """Split the path into a list of FCPaths at each occurrence of search_dir.

        Parameters:
            search_dir: The directory to search for.

        Returns:
            A tuple of FCPaths, each of which is a segment of the path between instances
            of search_dir, not including the search_dir itself.
        """

        parts = self.parts
        indices = [i for i, part in enumerate(parts) if part == search_dir]
        indices = [-1] + indices + [len(parts)]
        return tuple(FCPath(*parts[i+1:j], copy_from=self)
                     for i, j in zip(indices[:-1], indices[1:]))



[docs]
    def __repr__(self) -> str:
        parts = [repr(self._path)]
        if self._filecache is not None:
            parts.append(f'filecache={self._filecache!r}')
        if self._anonymous is not None:
            parts.append(f'anonymous={self._anonymous!r}')
        if self._lock_timeout is not None:
            parts.append(f'lock_timeout={self._lock_timeout!r}')
        if self._nthreads is not None:
            parts.append(f'nthreads={self._nthreads!r}')
        if self._url_to_url is not None:
            parts.append(f'url_to_url={self._url_to_url!r}')
        if self._url_to_path is not None:
            parts.append(f'url_to_path={self._url_to_path!r}')
        return f'FCPath({", ".join(parts)})'



[docs]
    def __eq__(self, other: object) -> bool:
        if not isinstance(other, FCPath):
            return NotImplemented
        return self._path == other._path



[docs]
    def __lt__(self, other: object) -> bool:
        if not isinstance(other, FCPath):
            return NotImplemented
        return self._path < other._path



[docs]
    def __le__(self, other: object) -> bool:
        if not isinstance(other, FCPath):
            return NotImplemented
        return self._path <= other._path



[docs]
    def __gt__(self, other: object) -> bool:
        if not isinstance(other, FCPath):
            return NotImplemented
        return self._path > other._path



[docs]
    def __ge__(self, other: object) -> bool:
        if not isinstance(other, FCPath):
            return NotImplemented
        return self._path >= other._path


    @property
    def name(self) -> str:
        """The final component of the path."""

        return FCPath._split(self._path)[1]

    @property
    def parent(self) -> FCPath:
        """The logical parent of the path.

        The new FCPath will have the same parameters (`filecache`, etc.) as the original
        path.
        """

        parent = FCPath._split(self._path)[0]
        if self._path != parent:
            return FCPath(parent, copy_from=self)
        return self

    @property
    def parents(self) -> tuple[FCPath, ...]:
        """A sequence of this path's logical parents."""

        path = self._path
        parent = FCPath._split(path)[0]
        parents = []
        while path != parent:
            parents.append(FCPath(parent, copy_from=self))
            path = parent
            parent = FCPath._split(path)[0]
        return tuple(parents)


[docs]
    def is_absolute(self) -> bool:
        """True if the path is absolute."""

        return FCPath._is_absolute(self._path)



[docs]
    def as_absolute(self) -> FCPath:
        """Return the absolute version of this possibly-relative path."""

        if FCPath._is_absolute(self._path):
            return self
        return FCPath(self.as_pathlib().expanduser().absolute().resolve(),
                      copy_from=self)



[docs]
    def match(self,
              path_pattern: str | Path | FCPath) -> bool:
        """Return True if this path matches the given pattern.

        If the pattern is relative, matching is done from the right; otherwise, the entire
        path is matched. The recursive wildcard ``**`` is *not* supported by this method
        (it just acts like ``*``).

        See pathlib.Path.match for full documentation.
        """

        if not isinstance(path_pattern, FCPath):
            path_pattern = FCPath(path_pattern)
        path_parts = self.parts[::-1]
        pattern_parts = path_pattern.parts[::-1]
        if not pattern_parts:
            raise ValueError('empty pattern')
        if len(path_parts) < len(pattern_parts):
            return False
        if len(path_parts) > len(pattern_parts) and path_pattern.anchor:
            return False
        globber = _StringGlobber(self)
        for path_part, pattern_part in zip(path_parts, pattern_parts):
            match = globber.compile(pattern_part)
            if match(path_part) is None:
                return False
        return True



[docs]
    def full_match(self,
                   pattern: str | Path | FCPath) -> bool:
        """Return True if this path matches the given glob-style pattern.

        The pattern is matched against the entire path.

        See pathlib.Path.full_match for full documentation.
        """

        if not isinstance(pattern, FCPath):
            pattern = FCPath(pattern)
        globber = _StringGlobber(self, recursive=True)
        match = globber.compile(str(pattern))
        return match(self._path) is not None


    @property
    def filecache(self) -> "FileCache":
        """The FileCache associated with this path."""
        from .file_cache import FileCache
        global _DEFAULT_FILECACHE
        if self._filecache is None:
            if _DEFAULT_FILECACHE is None:
                _DEFAULT_FILECACHE = FileCache()
            return _DEFAULT_FILECACHE
        return self._filecache

    def _make_paths_absolute(self,
                             sub_path: Optional[StrOrPathOrSeqType]) -> str | list[str]:
        if isinstance(sub_path, (list, tuple)):
            new_sub_paths: list[str] = []
            for p in sub_path:
                new_sub_path = FCPath._join(self._path, p)
                if not FCPath._is_absolute(new_sub_path):
                    new_sub_path = (FCPath(Path(new_sub_path)
                                           .expanduser().absolute().resolve())
                                    .as_posix())
                new_sub_paths.append(new_sub_path)
            return new_sub_paths
        new_sub_path = FCPath._join(self._path, sub_path)
        if not FCPath._is_absolute(new_sub_path):
            return (FCPath(Path(new_sub_path).expanduser().absolute().resolve())
                    .as_posix())
        return new_sub_path


[docs]
    def get_local_path(self,
                       sub_path: Optional[StrOrPathOrSeqType] = None,
                       *,
                       create_parents: bool = True,
                       url_to_url: Optional[UrlToUrlFuncOrSeqType] = None,
                       url_to_path: Optional[UrlToPathFuncOrSeqType] = None,
                       ) -> Path | list[Path]:
        """Return the local path for the given sub_path relative to this path.

        Parameters:
            sub_path: The path of the file relative to this path. If not specified, this
                path is used. If `sub_path` is a list or tuple, all paths are processed.
                If the resulting derived path is not absolute, it is assumed to be a
                relative local path and is converted to an absolute path by expanding
                usernames and resolving links.
            create_parents: If True, create all parent directories. This is useful when
                getting the local path of a file that will be uploaded.
            url_to_url: The function (or list of functions) that is used to translate URLs
                into URLs. A user-specified translator function takes three arguments::

                    func(scheme: str, remote: str, path: str) -> str

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, and `path` is the rest of the URL. If the translator wants to
                override the default translation, it can return a new complete URL as a
                string. Otherwise, it returns None. If more than one translator is
                specified, they are called in order until one returns a URL, or it falls
                through to the default.

                If None, use the default translators for the associated :class:`FileCache`
                instance.
            url_to_path: The function (or list of functions) that is used to translate
                URLs into local paths. By default, :class:`FileCache` uses a directory
                hierarchy consisting of ``<cache_dir>/<cache_name>/<source>/<path>``,
                where ``source`` is the URL prefix converted to a filesystem-friendly
                format (e.g. ``gs://bucket`` is converted to ``gs_bucket``). A
                user-specified translator function takes five arguments::

                    func(scheme: str, remote: str, path: str, cache_dir: Path,
                         cache_subdir: str) -> str | Path

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, `path` is the rest of the URL, `cache_dir` is the top-level
                directory of the cache (``<cache_dir>/<cache_name>``), and `cache_subdir`
                is the subdirectory specific to this scheme and remote. If the translator
                wants to override the default translation, it can return a Path.
                Otherwise, it returns None. If the returned Path is relative, if will be
                appended to `cache_dir`; if it is absolute, it will be used directly (be
                very careful with this, as it has the ability to access files outside of
                the cache directory). If more than one translator is specified, they are
                called in order until one returns a Path, or it falls through to the
                default. Note that `url_to_path` operates on the original URL, not the
                URL generated by a `url_to_url` translator.

                If None, use the default value given when this :class:`FCPath` was
                created.
        Returns:
            The Path (or list of Paths) of the URL (possibly as mapped by the
            `url_to_url` translators) in the cache directory, or as specified by the
            `url_to_path` translators. The files do not have to exist because a Path could
            be used for writing a file to upload. To facilitate this, a side effect of
            this call (if `create_parents` is True) is that the complete parent directory
            structure will be created for each returned Path.
        """

        new_sub_path = self._make_paths_absolute(sub_path)

        url_to_url = url_to_url or self._url_to_url
        url_to_path = url_to_path or self._url_to_path
        return self.filecache.get_local_path(cast(StrOrPathOrSeqType,
                                                  new_sub_path),
                                             anonymous=self._anonymous,
                                             create_parents=create_parents,
                                             url_to_url=url_to_url,
                                             url_to_path=url_to_path)



[docs]
    def exists(self,
               sub_path: Optional[StrOrPathOrSeqType] = None,
               *,
               bypass_cache: bool = False,
               nthreads: Optional[int] = None,
               url_to_url: Optional[UrlToUrlFuncOrSeqType] = None,
               url_to_path: Optional[UrlToPathFuncOrSeqType] = None
               ) -> bool | list[bool]:
        """Check if a file exists without downloading it.

        Parameters:
            sub_path: The path of the file relative to this path. If not specified, this
                path is used. If the resulting derived path is not absolute, it is assumed
                to be a relative local path and is converted to an absolute path by
                expanding usernames and resolving links.
            bypass_cache: If False, check for the file first in the local cache, and if
                not found there then on the remote server. If True, only check on the
                remote server.
            nthreads: The maximum number of threads to use when doing multiple-file
                retrieval or upload. If None, use the default value given when this
                :class:`FCPath` was created.
            url_to_url: The function (or list of functions) that is used to translate URLs
                into URLs. A user-specified translator function takes three arguments::

                    func(scheme: str, remote: str, path: str) -> str

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, and `path` is the rest of the URL. If the translator wants to
                override the default translation, it can return a new complete URL as a
                string. Otherwise, it returns None. If more than one translator is
                specified, they are called in order until one returns a URL, or it falls
                through to the default.

                If None, use the default translators for the associated :class:`FileCache`
                instance.
            url_to_path: The function (or list of functions) that is used to translate
                URLs into local paths. By default, :class:`FileCache` uses a directory
                hierarchy consisting of ``<cache_dir>/<cache_name>/<source>/<path>``,
                where ``source`` is the URL prefix converted to a filesystem-friendly
                format (e.g. ``gs://bucket`` is converted to ``gs_bucket``). A
                user-specified translator function takes five arguments::

                    func(scheme: str, remote: str, path: str, cache_dir: Path,
                         cache_subdir: str) -> str | Path

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, `path` is the rest of the URL, `cache_dir` is the top-level
                directory of the cache (``<cache_dir>/<cache_name>``), and `cache_subdir`
                is the subdirectory specific to this scheme and remote. If the translator
                wants to override the default translation, it can return a Path.
                Otherwise, it returns None. If the returned Path is relative, if will be
                appended to `cache_dir`; if it is absolute, it will be used directly (be
                very careful with this, as it has the ability to access files outside of
                the cache directory). If more than one translator is specified, they are
                called in order until one returns a Path, or it falls through to the
                default. Note that `url_to_path` operates on the original URL, not the
                URL generated by a `url_to_url` translator.

                If None, use the default value given when this :class:`FCPath` was
                created.
        Returns:
            True if the file exists. Note that it is possible that a file could exist and
            still not be downloadable due to permissions. False if the file does not
            exist. This includes bad bucket or webserver names, lack of permission to
            examine a bucket's contents, etc.
        """

        nthreads = self._validate_nthreads(nthreads)

        new_sub_path = self._make_paths_absolute(sub_path)

        return self.filecache.exists(cast(StrOrPathOrSeqType,
                                          new_sub_path),
                                     bypass_cache=bypass_cache,
                                     nthreads=nthreads,
                                     anonymous=self._anonymous,
                                     url_to_url=(url_to_url or
                                                 self._url_to_url),
                                     url_to_path=(url_to_path or
                                                  self._url_to_path))



[docs]
    def modification_time(self,
                          sub_path: Optional[StrOrPathOrSeqType] = None,
                          *,
                          bypass_cache: bool = False,
                          nthreads: Optional[int] = None,
                          exception_on_fail: bool = True,
                          url_to_url: Optional[UrlToUrlFuncOrSeqType] = None
                          ) -> float | None | Exception | list[float | None | Exception]:
        """Get the modification time of a remote file as a Unix timestamp.

        Parameters:
            sub_path: The path of the file relative to this path. If not specified, this
                path is used. If `sub_path` is a list or tuple, all URLs are checked. This
                may be more efficient because files can be checked in parallel. If the
                resulting derived path is not absolute, it is assumed to be a relative
                local path and is converted to an absolute path by expanding usernames and
                resolving links.
            bypass_cache: If False, retrieve the modification time for the file first from
                the metadata cache, if enabled, and if not found there then from the
                remote server. If True, only retrieve the modification time directly from
                the remote server.
            nthreads: The maximum number of threads to use. If None, use the default value
                given when this :class:`FCPath` was created.
            exception_on_fail: If True, if any file does not exist a FileNotFound
                exception is raised. If False, the function returns normally and any
                failed check is marked with the Exception that caused the failure in place
                of the returned modification time.
            url_to_url: The function (or list of functions) that is used to translate URLs
                into URLs. A user-specified translator function takes three arguments::

                    func(scheme: str, remote: str, path: str) -> str

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, and `path` is the rest of the URL. If the translator wants to
                override the default translation, it can return a new complete URL as a
                string. Otherwise, it returns None. If more than one translator is
                specified, they are called in order until one returns a URL, or it falls
                through to the default.

                If None, use the default translators for the associated :class:`FileCache`
                instance.

        Returns:
            The modification time as a Unix timestamp if the file exists and the time can
            be retrieved, None otherwise. If `sub_path` was a list or tuple, then instead
            return a list of modification times in order. This always returns the
            modification time of the file on the remote source, even if there is a local
            copy. If you want the modification time of the local copy, you can call the
            normal ``stat`` function. If `exception_on_fail` is False, any modification
            time may be an Exception if that file does not exist or the modification time
            cannot be retrieved.

        Raises:
            FileNotFoundError: If a file does not exist.
        """

        nthreads = self._validate_nthreads(nthreads)

        new_sub_path = self._make_paths_absolute(sub_path)

        url_to_url = url_to_url or self._url_to_url

        return (self.filecache
                .modification_time(cast(StrOrPathOrSeqType,
                                        new_sub_path),
                                   anonymous=self._anonymous,
                                   bypass_cache=bypass_cache,
                                   nthreads=nthreads,
                                   exception_on_fail=exception_on_fail,
                                   url_to_url=url_to_url)
                )



[docs]
    def is_dir(self,
               sub_path: Optional[StrOrPathOrSeqType] = None,
               *,
               nthreads: Optional[int] = None,
               exception_on_fail: bool = True,
               url_to_url: Optional[UrlToUrlFuncOrSeqType] = None
               ) -> bool | Exception | list[bool | Exception]:
        """Check if a path represents a directory.

        Parameters:
            sub_path: The path of the directory relative to this path. If not specified,
                this path is used. If `sub_path` is a list or tuple, all paths are
                checked. If the resulting derived path is not absolute, it is assumed to
                be a relative local path and is converted to an absolute path by expanding
                usernames and resolving links.
            nthreads: The maximum number of threads to use for multiple paths.
            exception_on_fail: If True, if any path cannot be checked a FileNotFound
                exception is raised. If False, the function returns normally and any
                failed check is marked with the Exception that caused the failure.
            url_to_url: The function (or list of functions) that is used to translate URLs
                into URLs. A user-specified translator function takes three arguments::

                    func(scheme: str, remote: str, path: str) -> str

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, and `path` is the rest of the URL. If the translator wants to
                override the default translation, it can return a new complete URL as a
                string. Otherwise, it returns None. If more than one translator is
                specified, they are called in order until one returns a URL, or it falls
                through to the default.

                If None, use the default translators for the associated :class:`FileCache`
                instance.

        Returns:
            True if the path represents a directory, False otherwise. If `sub_path` was a
            list or tuple, then instead return a list of booleans or exceptions in order.
            If `exception_on_fail` is False, any result may be an Exception if that path
            cannot be checked.

        Raises:
            FileNotFoundError: If a path cannot be checked.

        Notes:
            Unlike ``os.path.isdir`` or `pathlib.Path.is_dir``, this method raises an
            exception if the URL does not exist instead of returning ``False``. This
            is so that remote connection errors are not masked by the return value.
        """

        nthreads = self._validate_nthreads(nthreads)

        new_sub_path = self._make_paths_absolute(sub_path)

        url_to_url = url_to_url or self._url_to_url

        return self.filecache.is_dir(cast(StrOrPathOrSeqType, new_sub_path),
                                     anonymous=self._anonymous,
                                     nthreads=nthreads,
                                     exception_on_fail=exception_on_fail,
                                     url_to_url=url_to_url)



[docs]
    def retrieve(self,
                 sub_path: Optional[StrOrPathOrSeqType] = None,
                 *,
                 lock_timeout: Optional[int] = None,
                 nthreads: Optional[int] = None,
                 exception_on_fail: bool = True,
                 url_to_url: Optional[UrlToUrlFuncOrSeqType] = None,
                 url_to_path: Optional[UrlToPathFuncOrSeqType] = None
                 ) -> Path | Exception | list[Path | Exception]:
        """Retrieve a file(s) from the given sub_path and store it in the file cache.

        Parameters:
            sub_path: The path of the file relative to this path. If not specified, this
                path is used. If `sub_path` is a list or tuple, the complete list of files
                is retrieved. Depending on the storage location, this may be more
                efficient because files can be downloaded in parallel. If the resulting
                derived path is not absolute, it is assumed to be a relative local path
                and is converted to an absolute path by expanding usernames and resolving
                links.
            nthreads: The maximum number of threads to use when doing multiple-file
                retrieval or upload. If None, use the default value given when this
                :class:`FCPath` was created.
            lock_timeout: How long to wait, in seconds, if another process is marked as
                retrieving the file before raising an exception. 0 means to not wait at
                all. A negative value means to never time out. None means to use the
                default value given when this :class:`FCPath` was created.
            exception_on_fail: If True, if any file does not exist or download fails a
                FileNotFound exception is raised, and if any attempt to acquire a lock or
                wait for another process to download a file fails a TimeoutError is
                raised. If False, the function returns normally and any failed download is
                marked with the Exception that caused the failure in place of the returned
                Path.
            url_to_url: The function (or list of functions) that is used to translate URLs
                into URLs. A user-specified translator function takes three arguments::

                    func(scheme: str, remote: str, path: str) -> str

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, and `path` is the rest of the URL. If the translator wants to
                override the default translation, it can return a new complete URL as a
                string. Otherwise, it returns None. If more than one translator is
                specified, they are called in order until one returns a URL, or it falls
                through to the default.

                If None, use the default translators for the associated :class:`FileCache`
                instance.
            url_to_path: The function (or list of functions) that is used to translate
                URLs into local paths. By default, :class:`FileCache` uses a directory
                hierarchy consisting of ``<cache_dir>/<cache_name>/<source>/<path>``,
                where ``source`` is the URL prefix converted to a filesystem-friendly
                format (e.g. ``gs://bucket`` is converted to ``gs_bucket``). A
                user-specified translator function takes five arguments::

                    func(scheme: str, remote: str, path: str, cache_dir: Path,
                         cache_subdir: str) -> str | Path

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, `path` is the rest of the URL, `cache_dir` is the top-level
                directory of the cache (``<cache_dir>/<cache_name>``), and `cache_subdir`
                is the subdirectory specific to this scheme and remote. If the translator
                wants to override the default translation, it can return a Path.
                Otherwise, it returns None. If the returned Path is relative, if will be
                appended to `cache_dir`; if it is absolute, it will be used directly (be
                very careful with this, as it has the ability to access files outside of
                the cache directory). If more than one translator is specified, they are
                called in order until one returns a Path, or it falls through to the
                default. Note that `url_to_path` operates on the original URL, not the
                URL generated by a `url_to_url` translator.

                If None, use the default value given when this :class:`FCPath` was
                created.
        Returns:
            The Path of the filename in the temporary directory (or the original absolute
            path if local). If `sub_path` was a list or tuple of paths, then instead
            return a list of Paths of the filenames in the temporary directory (or the
            original absolute path if local). If `exception_on_fail` is False, any Path
            may be an Exception if that file does not exist or the download failed or a
            timeout occurred.

        Raises:
            FileNotFoundError: If a file does not exist or could not be downloaded, and
                exception_on_fail is True.
            TimeoutError: If we could not acquire the lock to allow downloading of a file
                within the given timeout or, for a multi-file download, if we timed out
                waiting for other processes to download locked files, and
                exception_on_fail is True.

        Notes:
            File download is normally an atomic operation; a program will never see a
            partially-downloaded file, and if a download is interrupted there will be no
            file present. However, when downloading multiple files at the same time, as
            many files as possible are downloaded before an exception is raised.
        """

        old_download_counter = self.filecache.download_counter

        nthreads = self._validate_nthreads(nthreads)

        if lock_timeout is None:
            lock_timeout = self._lock_timeout

        new_sub_path = self._make_paths_absolute(sub_path)

        url_to_url = url_to_url or self._url_to_url
        url_to_path = url_to_path or self._url_to_path

        try:
            ret = self.filecache.retrieve(cast(StrOrPathOrSeqType,
                                               new_sub_path),
                                          anonymous=self._anonymous,
                                          lock_timeout=lock_timeout,
                                          nthreads=nthreads,
                                          exception_on_fail=exception_on_fail,
                                          url_to_url=url_to_url,
                                          url_to_path=url_to_path)
        finally:
            self._download_counter += (self.filecache.download_counter -
                                       old_download_counter)

        return ret



[docs]
    def upload(self,
               sub_path: Optional[StrOrPathOrSeqType] = None,
               *,
               nthreads: Optional[int] = None,
               exception_on_fail: bool = True,
               url_to_url: Optional[UrlToUrlFuncOrSeqType] = None,
               url_to_path: Optional[UrlToPathFuncOrSeqType] = None
               ) -> Path | Exception | list[Path | Exception]:
        """Upload file(s) from the file cache to the storage location(s).

        Parameters:
            sub_path: The path of the file relative to this path. If not specified, this
                path is used. If `sub_path` is a list or tuple, the complete list of files
                is uploaded. This may be more efficient because files can be uploaded in
                parallel. If the resulting derived path is not absolute, it is assumed to
                be a relative local path and is converted to an absolute path by expanding
                usernames and resolving links.
            nthreads: The maximum number of threads to use when doing multiple-file
                retrieval or upload. If None, use the default value given when this
                :class:`FileCache` was created.
            exception_on_fail: If True, if any file does not exist or upload fails an
                exception is raised. If False, the function returns normally and any
                failed upload is marked with the Exception that caused the failure in
                place of the returned path.
            url_to_url: The function (or list of functions) that is used to translate URLs
                into URLs. A user-specified translator function takes three arguments::

                    func(scheme: str, remote: str, path: str) -> str

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, and `path` is the rest of the URL. If the translator wants to
                override the default translation, it can return a new complete URL as a
                string. Otherwise, it returns None. If more than one translator is
                specified, they are called in order until one returns a URL, or it falls
                through to the default.

                If None, use the default translators for the associated :class:`FileCache`
                instance.
            url_to_path: The function (or list of functions) that is used to translate
                URLs into local paths. By default, :class:`FileCache` uses a directory
                hierarchy consisting of ``<cache_dir>/<cache_name>/<source>/<path>``,
                where ``source`` is the URL prefix converted to a filesystem-friendly
                format (e.g. ``gs://bucket`` is converted to ``gs_bucket``). A
                user-specified translator function takes five arguments::

                    func(scheme: str, remote: str, path: str, cache_dir: Path,
                         cache_subdir: str) -> str | Path

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, `path` is the rest of the URL, `cache_dir` is the top-level
                directory of the cache (``<cache_dir>/<cache_name>``), and `cache_subdir`
                is the subdirectory specific to this scheme and remote. If the translator
                wants to override the default translation, it can return a Path.
                Otherwise, it returns None. If the returned Path is relative, if will be
                appended to `cache_dir`; if it is absolute, it will be used directly (be
                very careful with this, as it has the ability to access files outside of
                the cache directory). If more than one translator is specified, they are
                called in order until one returns a Path, or it falls through to the
                default. Note that `url_to_path` operates on the original URL, not the
                URL generated by a `url_to_url` translator.

                If None, use the default value given when this :class:`FCPath` was
                created.
        Returns:
            The Path of the filename in the temporary directory (or the original absolute
            path if local). If `sub_path` was a list or tuple of paths, then instead
            return a list of Paths of the filenames in the temporary directory (or the
            original absolute path if local). If `exception_on_fail` is False, any Path
            may be an Exception if that file does not exist or the upload failed.

        Raises:
            FileNotFoundError: If a file to upload does not exist or the upload failed,
                and exception_on_fail is True.
        """

        old_upload_counter = self.filecache.upload_counter

        nthreads = self._validate_nthreads(nthreads)

        new_sub_path = self._make_paths_absolute(sub_path)

        url_to_url = url_to_url or self._url_to_url
        url_to_path = url_to_path or self._url_to_path

        try:
            ret = self.filecache.upload(cast(StrOrPathOrSeqType,
                                             new_sub_path),
                                        anonymous=self._anonymous,
                                        nthreads=nthreads,
                                        exception_on_fail=exception_on_fail,
                                        url_to_url=url_to_url,
                                        url_to_path=url_to_path)
        finally:
            self._upload_counter += (self.filecache.upload_counter -
                                     old_upload_counter)

        return ret



[docs]
    @contextlib.contextmanager
    def open(self,
             mode: str = 'r',
             *args: Any,
             url_to_url: Optional[UrlToUrlFuncOrSeqType] = None,
             url_to_path: Optional[UrlToPathFuncOrSeqType] = None,
             **kwargs: Any) -> Iterator[IO[Any]]:
        """Retrieve+open or open+upload a file as a context manager.

        If `mode` is a read mode (like ``'r'`` or ``'rb'``) then the file will be first
        retrieved by calling :meth:`retrieve` and then opened. If the `mode` is a write
        mode (like ``'w'`` or ``'wb'``) then the file will be first opened for write, and
        when this context manager is exited the file will be uploaded.

        Parameters:
            mode: The mode string as you would specify to Python's `open()` function.
            **args: Any additional arguments are passed to the Python ``open()`` function.
            url_to_url: The function (or list of functions) that is used to translate URLs
                into URLs. A user-specified translator function takes three arguments::

                    func(scheme: str, remote: str, path: str) -> str

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, and `path` is the rest of the URL. If the translator wants to
                override the default translation, it can return a new complete URL as a
                string. Otherwise, it returns None. If more than one translator is
                specified, they are called in order until one returns a URL, or it falls
                through to the default.

                If None, use the default translators for the associated :class:`FileCache`
                instance.
            url_to_path: The function (or list of functions) that is used to translate
                URLs into local paths. By default, :class:`FileCache` uses a directory
                hierarchy consisting of ``<cache_dir>/<cache_name>/<source>/<path>``,
                where ``source`` is the URL prefix converted to a filesystem-friendly
                format (e.g. ``gs://bucket`` is converted to ``gs_bucket``). A
                user-specified translator function takes five arguments::

                    func(scheme: str, remote: str, path: str, cache_dir: Path,
                         cache_subdir: str) -> str | Path

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, `path` is the rest of the URL, `cache_dir` is the top-level
                directory of the cache (``<cache_dir>/<cache_name>``), and `cache_subdir`
                is the subdirectory specific to this scheme and remote. If the translator
                wants to override the default translation, it can return a Path.
                Otherwise, it returns None. If the returned Path is relative, if will be
                appended to `cache_dir`; if it is absolute, it will be used directly (be
                very careful with this, as it has the ability to access files outside of
                the cache directory). If more than one translator is specified, they are
                called in order until one returns a Path, or it falls through to the
                default. Note that `url_to_path` operates on the original URL, not the
                URL generated by a `url_to_url` translator.

                If None, use the default value given when this :class:`FCPath` was
                created.
            **kwargs: Any additional arguments are passed to the Python ``open()``
                function.

        Returns:
            IO object: The same object as would be returned by the normal `open()`
            function.
        """

        url_to_url = url_to_url or self._url_to_url
        url_to_path = url_to_path or self._url_to_path

        if mode[0] == 'r':
            local_path = cast(Path, self.retrieve(None,
                                                  url_to_url=url_to_url,
                                                  url_to_path=url_to_path))
            with open(local_path, mode, *args, **kwargs) as fp:
                yield fp
        else:  # 'w', 'x', 'a'
            local_path = cast(Path, self.get_local_path(None,
                                                        url_to_url=url_to_url,
                                                        url_to_path=url_to_path))
            with open(local_path, mode, *args, **kwargs) as fp:
                yield fp
            self.upload(None,
                        url_to_url=url_to_url,
                        url_to_path=url_to_path)



[docs]
    def unlink(self,
               sub_path: Optional[StrOrPathOrSeqType] = None,
               *,
               missing_ok: bool = False,
               nthreads: Optional[int] = None,
               exception_on_fail: bool = True,
               url_to_url: Optional[UrlToUrlFuncOrSeqType] = None,
               url_to_path: Optional[UrlToPathFuncOrSeqType] = None
               ) -> str | Exception | list[str | Exception]:
        """Remove this file or link.

        Parameters:
            sub_path: The path of the file relative to this path. If not specified, this
                path is used. If `sub_path` is a list or tuple, the complete list of files
                is retrieved. Depending on the storage location, this may be more
                efficient because files can be downloaded in parallel. If the resulting
                derived path is not absolute, it is assumed to be a relative local path
                and is converted to an absolute path by expanding usernames and resolving
                links.
            missing_ok: True to ignore attempting to unlink a file that doesn't exist.
            nthreads: The maximum number of threads to use when doing multiple-file
                retrieval or upload. If None, use the default value given when this
                :class:`FileCache` was created.
            exception_on_fail: If True, if any file does not exist or upload fails an
                exception is raised. If False, the function returns normally and any
                failed upload is marked with the Exception that caused the failure in
                place of the returned path.
            url_to_url: The function (or list of functions) that is used to translate URLs
                into URLs. A user-specified translator function takes three arguments::

                    func(scheme: str, remote: str, path: str) -> str

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, and `path` is the rest of the URL. If the translator wants to
                override the default translation, it can return a new complete URL as a
                string. Otherwise, it returns None. If more than one translator is
                specified, they are called in order until one returns a URL, or it falls
                through to the default.

                If None, use the default translators for the associated :class:`FileCache`
                instance.
            url_to_path: The function (or list of functions) that is used to translate
                URLs into local paths. By default, :class:`FileCache` uses a directory
                hierarchy consisting of ``<cache_dir>/<cache_name>/<source>/<path>``,
                where ``source`` is the URL prefix converted to a filesystem-friendly
                format (e.g. ``gs://bucket`` is converted to ``gs_bucket``). A
                user-specified translator function takes five arguments::

                    func(scheme: str, remote: str, path: str, cache_dir: Path,
                         cache_subdir: str) -> str | Path

                where `scheme` is the URL scheme (like ``"gs"`` or ``"file"``), `remote`
                is the name of the bucket or webserver or the empty string for a local
                file, `path` is the rest of the URL, `cache_dir` is the top-level
                directory of the cache (``<cache_dir>/<cache_name>``), and `cache_subdir`
                is the subdirectory specific to this scheme and remote. If the translator
                wants to override the default translation, it can return a Path.
                Otherwise, it returns None. If the returned Path is relative, if will be
                appended to `cache_dir`; if it is absolute, it will be used directly (be
                very careful with this, as it has the ability to access files outside of
                the cache directory). If more than one translator is specified, they are
                called in order until one returns a Path, or it falls through to the
                default. Note that `url_to_path` operates on the original URL, not the
                URL generated by a `url_to_url` translator.

                If None, use the default value given when this :class:`FCPath` was
                created.

        See `pathlib.Path.unlink` for full documentation.
        """

        nthreads = self._validate_nthreads(nthreads)

        new_sub_path = self._make_paths_absolute(sub_path)

        url_to_url = url_to_url or self._url_to_url
        url_to_path = url_to_path or self._url_to_path

        return self.filecache.unlink(cast(StrOrPathOrSeqType,
                                          new_sub_path),
                                     missing_ok=missing_ok,
                                     anonymous=self._anonymous,
                                     nthreads=nthreads,
                                     exception_on_fail=exception_on_fail,
                                     url_to_url=url_to_url,
                                     url_to_path=url_to_path)


    @property
    def download_counter(self) -> int:
        """The number of actual file downloads that have taken place."""

        return self._download_counter

    @property
    def upload_counter(self) -> int:
        """The number of actual file uploads that have taken place."""

        return self._upload_counter


[docs]
    def is_local(self) -> bool:
        """True if the path refers to the local filesystem."""

        return self._path.startswith('file:///') or '://' not in self._path



[docs]
    def is_file(self) -> bool:
        """True if this path exists and is a regular file."""

        return cast(bool, self.exists() and not self.is_dir())



[docs]
    def read_bytes(self, **kwargs: Any) -> bytearray:
        """Download and open the file in bytes mode, read it, and close the file.

        Any additional arguments are passed to the Python ``open()`` function.
        """

        with self.open(mode='rb', **kwargs) as f:
            return cast(bytearray, f.read())



[docs]
    def read_text(self, **kwargs: Any) -> str:
        """Download and open the file in text mode, read it, and close the file.

        Any additional arguments are passed to the Python ``open()`` function.
        """

        with self.open(mode='r', **kwargs) as f:
            return cast(str, f.read())



[docs]
    def write_bytes(self, data: Any, **kwargs: Any) -> int:
        """Open the file in bytes mode, write to it, and close and upload the file.

        Any additional arguments are passed to the Python ``open()`` function.
        """

        # type-check for the buffer interface before truncating the file
        view = memoryview(data)
        with self.open(mode='wb', **kwargs) as f:
            return f.write(view)



[docs]
    def write_text(self, data: Any, **kwargs: Any) -> int:
        """Open the file in text mode, write to it, and close and upload the file.

        Any additional arguments are passed to the Python ``open()`` function.
        """

        if not isinstance(data, str):
            raise TypeError('data must be str, not %s' %
                            data.__class__.__name__)
        with self.open(mode='w', **kwargs) as f:
            return f.write(data)



[docs]
    def iterdir(self) -> Iterator[FCPath]:
        """Yield FCPath objects of the current path's directory contents.

        The children are yielded in arbitrary order, and the special entries '.' and '..'
        are not included.
        """

        for obj in self.filecache.iterdir(self._path, url_to_url=self._url_to_url):
            yield FCPath(obj, copy_from=self)



[docs]
    def iterdir_metadata(self) -> Iterator[tuple[FCPath, dict[str, Any] | None]]:
        """Yield FCPath objects of the current directory's contents, with metadata.

        Yields:
            All files and sub-directories in the given directory (except ``.`` and
            ``..``), in no particular order. Each file or directory is represented by a
            tuple of the form (path, metadata), where path is the path of the file or
            directory relative to the source prefix, and metadata is a dictionary with the
            following keys:

                - ``is_dir``: True if the returned name is a directory, False if it is a
                  file.
                - ``mtime``: The last modification time of the file as a float.
                - ``size``: The approximate size of the file in bytes.

            If the metadata can not be retrieved, None is returned for the metadata.
        """

        for obj, metadata in (
                self.filecache.iterdir_metadata(self._path, url_to_url=self._url_to_url)):
            yield FCPath(obj, copy_from=self), metadata



[docs]
    def glob(self,
             pattern: str | Path | FCPath) -> Generator[FCPath]:
        """Yield all existing files and directories matching the given relative pattern.

        Notes:
            If the FCPath is local, then the normal `pathlib.Path.glob()` method is
            called. If the pattern is only `**`, this function had different behavior
            before Python 3.13 (only directories returned) and in Python 3.13 and later
            (both files and directories are returned). In contrast, when the FCPath is
            remote, we always return all files and directories. To be safe, do not use
            `**` but instead always use `**/*`.
        """

        if not isinstance(pattern, FCPath):
            pattern = FCPath(pattern)

        if pattern.is_absolute():
            raise NotImplementedError('Non-relative patterns are unsupported')

        if self.is_local():
            for res in self.as_pathlib().glob(pattern.path):
                yield FCPath(res, copy_from=self)
            return

        parts = pattern.path.split('/')
        select = _StringGlobber(self, recursive=True).selector(parts[::-1])
        for path in select(self.path):
            yield FCPath(path, copy_from=self)



[docs]
    def rglob(self,
              pattern: str | Path | FCPath) -> Generator[FCPath]:
        """Yield all existing files and directories matching the given relative pattern.

        This is like calling :meth:`FCPath.glob()` with ``**/`` added in front of the
        pattern.

        Notes:
            If the FCPath is local, then the normal `pathlib.Path.glob()` method is
            called. If the pattern is only `**`, this function had different behavior
            before Python 3.13 (only directories returned) and in Python 3.13 and later
            (both files and directories are returned). In contrast, when the FCPath is
            remote, we always return all files and directories. To be safe, do not use
            `**` but instead always use `**/*`.
        """

        if not isinstance(pattern, FCPath):
            pattern = FCPath(pattern)
        pattern = '**' / pattern
        return self.glob(pattern)



[docs]
    def walk(self,
             top_down: bool = True
             ) -> Iterator[tuple[FCPath, list[str], list[str]]]:
        """Walk the directory tree from this directory.

        See `pathlib.Path.walk` for full documentation.
        """

        paths: list[FCPath | tuple[FCPath, list[str], list[str]]] = [self]
        while paths:
            path = paths.pop()
            if isinstance(path, tuple):
                yield path
                continue
            dirnames: list[str] = []
            filenames: list[str] = []
            if not top_down:
                paths.append((path, dirnames, filenames))
            for child, metadata in path.iterdir_metadata():
                if metadata is None:
                    continue
                if metadata['is_dir']:
                    if not top_down:
                        paths.append(child)
                    dirnames.append(child.name)
                else:
                    filenames.append(child.name)
            if top_down:
                yield path, dirnames, filenames
                paths += [path.joinpath(d) for d in reversed(dirnames)]



[docs]
    def rename(self,
               target: str | Path | FCPath) -> FCPath:
        """Rename this path to the target path.

        Both the source and target paths must be absolute, and must be in the same
        location (e.g. both local files or both in the same GS bucket). Because cloud
        platforms do not support renaming of files, this is accomplished by downloading
        the source file, uploading it with the new name, and deleting the original
        version. If the target already exists, it will be overwritten. If the downloading
        or uploading fails, the copy in the local cache is removed to eliminate ambiguity.
        If there is only a copy in the local cache and the source path does not exist on
        the remote, the rename will still succeed by uploading a copy to the target path.

        Parameters:
            target: The path to rename to.

        Returns:
            The new FCPath instance pointing to the target path.
        """

        if not isinstance(target, FCPath):
            target = FCPath(target)
        target = target.as_absolute()

        src = self.as_absolute()

        if src.is_local() != target.is_local():
            raise ValueError('Unable to rename files between local and remote locations: '
                             f'{src.path!r} and f{target.path!r}')

        drive1, root1, subpath1 = FCPath._split_parts(src.path)
        drive2, root2, subpath2 = FCPath._split_parts(target.path)

        if drive1 != drive2 or root1 != root2:
            raise ValueError('Unable to rename files across locations: '
                             f'{src.path!r} and f{target.path!r}')

        if src.is_local():
            # Local to local - just do an OS rename and be done with it
            target.parent.mkdir(parents=True, exist_ok=True)
            src.as_pathlib().rename(target.as_pathlib())
            return target

        # Since you generally can't rename on a remote cloud location, first
        # download the file, then rename it locally, then upload it to the new name,
        # then delete the old name
        local_path = cast(Path, src.retrieve())
        target_local_path = cast(Path, target.get_local_path())
        local_path.rename(target_local_path)  # Rename in the cache
        try:
            target.upload()  # Upload the new version
        except Exception:
            target_local_path.unlink(missing_ok=True)
            raise
        src.unlink(missing_ok=True)  # Delete the old version

        return target



[docs]
    def replace(self, target: str | FCPath) -> FCPath:
        """Rename this path to the target path, overwriting if that path exists.

        Both the source and target paths must be absolute, and must be in the same
        location (e.g. both local files or both in the same GS bucket). Because cloud
        platforms do not support renaming of files, this is accomplished by downloading
        the source file, uploading it with the new name, and deleting the original
        version. If the target already exists, it will be overwritten.

        Parameters:
            target: The path to rename to.

        Returns:
            The new FCPath instance pointing to the target path.
        """

        return self.rename(target)


    if sys.version_info >= (3, 12):
        def relative_to(self,
                        other: str | Path | FCPath,
                        *,
                        walk_up: bool = False) -> FCPath:
            """Return the relative path to another path.

            See `pathlib.Path.relative_to` for full documentation.
            """

            if not isinstance(other, FCPath):
                other = FCPath(other)

            if self.is_local():
                return FCPath(self.as_pathlib().relative_to(other.as_pathlib(),
                                                            walk_up=walk_up),
                              copy_from=self)

            if walk_up:
                raise NotImplementedError(
                    'walk_up is not supported for non-local FCPaths')

            if not self._path.startswith(other._path):
                raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}")

            return FCPath(self._path[len(other._path)+1:], copy_from=self)
    else:

[docs]
        def relative_to(self,
                        other: str | Path | FCPath) -> FCPath:
            """Return the relative path to another path.

            See `pathlib.Path.relative_to` for full documentation.
            """

            if not isinstance(other, FCPath):
                other = FCPath(other)

            if self.is_local():
                return FCPath(self.as_pathlib().relative_to(other.as_pathlib()),
                              copy_from=self)

            if not self._path.startswith(other._path):
                raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}")

            return FCPath(self._path[len(other._path)+1:], copy_from=self)



[docs]
    def is_relative_to(self,
                       other: str | Path | FCPath) -> bool:
        """Return True if the path is relative to another path.

        See `pathlib.Path.is_relative_to` for full documentation.
        """

        if not isinstance(other, FCPath):
            other = FCPath(other)

        return self._path.startswith(other._path)



[docs]
    def is_reserved(self) -> bool:
        """True if the path contains a special reserved name.

        See `pathlib.Path.is_reserved` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('is_reserved on a remote file is not implemented')

        return self.as_pathlib().is_reserved()



[docs]
    def stat(self,
             *,
             follow_symlinks: bool = True) -> Any:
        """Return the result of the stat() system call on this path.

        Only valid for local files. See `pathlib.Path.stat` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('stat on a remote file is not implemented; use '
                                      'modification_time() if you just want that info')

        return self.as_pathlib().stat(follow_symlinks=follow_symlinks)



[docs]
    def lstat(self) -> Any:
        """Like stat(), except if the path points to a symlink, the symlink's
        status information is returned, rather than its target's.

        Only valid for local files. See `pathlib.Path.lstat` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('lstat on a remote file is not implemented')

        return self.as_pathlib().lstat()



[docs]
    def is_mount(self) -> bool:
        """Check if this path is a mount point.

        Only valid for local directories. See `pathlib.Path.is_mount` for full
        documentation.
        """

        if not self.is_local():
            raise NotImplementedError('is_mount on a remote directory is not implemented')

        return self.as_pathlib().is_mount()



[docs]
    def is_symlink(self) -> bool:
        """Whether this path is a symbolic link.

        Only valid for local files. See `pathlib.Path.is_symlink` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('is_symlink on a remote file is not implemented')

        return self.as_pathlib().is_symlink()


    if sys.version_info >= (3, 12):

[docs]
        def is_junction(self) -> bool:
            """Whether this path is a junction.

            Only valid for local files. See `pathlib.Path.is_junction` for full
            documentation.
            """

            if not self.is_local():
                raise NotImplementedError(
                    'is_junction on a remote file is not implemented')

            return self.as_pathlib().is_junction()



[docs]
    def is_block_device(self) -> bool:
        """Whether this path is a block device.

        Only valid for local files. See `pathlib.Path.is_block_device` for full
        documentation.
        """

        if not self.is_local():
            raise NotImplementedError(
                'is_block_device on a remote file is not implemented')

        return self.as_pathlib().is_block_device()



[docs]
    def is_char_device(self) -> bool:
        """Whether this path is a character device.

        Only valid for local files. See `pathlib.Path.is_char_device` for full
        documentation.
        """

        if not self.is_local():
            raise NotImplementedError(
                'is_char_device on a remote file is not implemented')

        return self.as_pathlib().is_char_device()



[docs]
    def is_fifo(self) -> bool:
        """Whether this path is a FIFO.

        Only valid for local files. See `pathlib.Path.is_fifo` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('is_fifo on a remote file is not implemented')

        return self.as_pathlib().is_fifo()



[docs]
    def is_socket(self) -> bool:
        """Whether this path is a socket.

        Only valid for local files. See `pathlib.Path.is_socket` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('is_socket on a remote file is not implemented')

        return self.as_pathlib().is_socket()



[docs]
    def samefile(self,
                 other_path: str | Path | FCPath) -> bool:
        """True if this path and the given path refer to the same file.

        Unlink the `pathlib.Path.samefile` version, this function only looks to see
        if the URLs are identical. Thus symlinks, hardlinks, etc. are ignored.
        """

        if not isinstance(other_path, FCPath):
            other_path = FCPath(other_path)

        return self._path == other_path._path



[docs]
    def absolute(self) -> FCPath:
        """Return an absolute version of this path.

        For non-local paths, this just returns the URL. For local paths, it does the same
        operations as `pathlib.Path.absolute`. See `pathlib.Path.absolute` for full
        documentation.
        """

        if not self.is_local():
            return self

        return FCPath(self.as_pathlib().absolute(), copy_from=self)



[docs]
    @classmethod
    def cwd(cls) -> FCPath:
        """Return a new FCPath pointing to the current working directory.

        See `pathlib.Path.cwd` for full documentation.
        """

        return FCPath(Path.cwd())



[docs]
    def expanduser(self) -> FCPath:
        """Return a new FCPath with expanded ~ and ~user constructs.

        See `pathlib.Path.expanduser` for full documentation.
        """

        if self.is_local():
            return FCPath(self.as_pathlib().expanduser(), copy_from=self)

        return self



[docs]
    def expandvars(self) -> FCPath:
        """Return a new FCPath with expanded environment variables.

        See `os.path.expandvars` for full documentation.
        """

        return FCPath(os.path.expandvars(self.as_posix()), copy_from=self)



[docs]
    @classmethod
    def home(cls) -> FCPath:
        """Return a new FCPath pointing to expanduser('~').

        See `pathlib.Path.home` for full documentation.
        """

        return FCPath(os.path.expanduser('~'))



[docs]
    def readlink(self) -> FCPath:
        """Return the FCPath to which the symbolic link points.

        Only valid for local files. See `pathlib.Path.readlink` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('readlink on a remote file is not implemented')

        return FCPath(self.as_pathlib().readlink())



[docs]
    def resolve(self,
                strict: bool = False) -> FCPath:
        """Return the absolute path with resolved symlinks.

        See `pathlib.Path.resolve` for full documentation.
        """

        if self.is_local():
            return FCPath(self.as_pathlib().absolute().resolve(strict=strict),
                          copy_from=self)

        return self



[docs]
    def symlink_to(self,
                   target: str,
                   target_is_directory: bool = False) -> None:
        """Make this path a symlink pointing to the target path.

        Only valid for local files. See `pathlib.Path.symlink_to` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('symlink_to on a remote file is not implemented')

        self.as_pathlib().symlink_to(target, target_is_directory=target_is_directory)



[docs]
    def hardlink_to(self,
                    target: str) -> None:
        """Make this path a hard link pointing to the same file as *target*.

        Only valid for local files. See `pathlib.Path.hardlink_to` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('hardlink_to on a remote file is not implemented')

        self.as_pathlib().hardlink_to(target)



[docs]
    def touch(self,
              mode: int = 0o666,
              exist_ok: bool = True) -> None:
        """Create this file, if it doesn't exist.

        See `pathlib.Path.touch` for full documentation.
        """

        if self.is_local():
            self.as_pathlib().touch(mode=mode, exist_ok=exist_ok)

        if not self.exists():
            self.write_bytes(b'')
        else:
            # Read and write the file to update the creation time
            self.retrieve()
            self.upload()



[docs]
    def mkdir(self,
              mode: int = 0o777,
              parents: bool = False,
              exist_ok: bool = False) -> None:
        """Create a new directory at this given path.

        Only valid for local directories. See `pathlib.Path.mkdir` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('mkdir on a remote directory is not implemented')

        self.as_pathlib().mkdir(mode=mode, parents=parents, exist_ok=exist_ok)



[docs]
    def chmod(self,
              mode: int,
              *,
              follow_symlinks: bool = True) -> None:
        """Change the permissions of the path, like os.chmod().

        Only valid for local files. See `pathlib.Path.chmod` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('chmod on a remote file is not implemented')

        self.as_pathlib().chmod(mode=mode, follow_symlinks=follow_symlinks)



[docs]
    def lchmod(self,
               mode: int) -> None:
        """Like chmod(), except if the path points to a symlink, the symlink's
        permissions are changed, rather than its target's.

        Only valid for local files. See `pathlib.Path.lchmod` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('lchmod on a remote file is not implemented')

        self.as_pathlib().lchmod(mode=mode)



[docs]
    def rmdir(self) -> None:
        """Remove this directory. The directory must be empty.

        Only valid for local directories. See `pathlib.Path.rmdir` for full documentation.
        """

        if not self.is_local():
            raise NotImplementedError('rmdir on a remote directory is not implemented')

        self.as_pathlib().rmdir()


    if sys.version_info >= (3, 13):
        def owner(self, *,
                  follow_symlinks: bool = True) -> str:
            """Return the login name of the file owner.

            Only valid for local files. See `pathlib.Path.owner` for full documentation.
            """

            if not self.is_local():
                raise NotImplementedError('owner on a remote file is not implemented')

            return self.as_pathlib().owner(follow_symlinks=follow_symlinks)

        def group(self, *,
                  follow_symlinks: bool = True) -> str:
            """Return the group name of the file gid.

            Only valid for local files. See `pathlib.Path.group` for full documentation.
            """

            if not self.is_local():
                raise NotImplementedError('group on a remote file is not implemented')

            return self.as_pathlib().group(follow_symlinks=follow_symlinks)
    else:

[docs]
        def owner(self) -> str:
            """Return the login name of the file owner.

            Only valid for local files. See `pathlib.Path.owner` for full documentation.
            """

            if not self.is_local():
                raise NotImplementedError('owner on a remote file is not implemented')

            return self.as_pathlib().owner()



[docs]
        def group(self) -> str:
            """Return the group name of the file gid.

            Only valid for local files. See `pathlib.Path.group` for full documentation.
            """

            if not self.is_local():
                raise NotImplementedError('group on a remote file is not implemented')

            return self.as_pathlib().group()



[docs]
    @classmethod
    def from_uri(cls,
                 uri: str) -> FCPath:
        """Return a new FCPath from the given URI."""

        return FCPath(uri)



[docs]
    def as_uri(self) -> str:
        """Return the path as a URI."""

        if not self.is_absolute():
            raise ValueError("relative path can't be expressed as a file URI")

        if not self.is_local() or self.path.startswith('file://'):
            return self._path

        drive, root, subpath = FCPath._split_parts(self._path)
        if len(drive) == 2:
            # It's a path on a local drive => 'file:///c:/a/b'
            return f'file:///{self._path}'
        elif drive:
            # It's a path on a network drive => 'file://host/share/a/b'
            return f'file:{self._path}'
        # It's a posix path => 'file:///etc/hosts'
        return f'file://{self._path}'




def _translate2(pat: str,
                STAR: str,
                QUESTION_MARK: str) -> list[str]:
    res: list[str] = []
    add = res.append
    i, n = 0, len(pat)
    while i < n:
        c = pat[i]
        i = i+1
        if c == '*':
            # compress consecutive `*` into one
            if (not res) or res[-1] is not STAR:
                add(STAR)
        elif c == '?':
            add(QUESTION_MARK)
        elif c == '[':
            j = i
            if j < n and pat[j] == '!':
                j = j+1
            if j < n and pat[j] == ']':
                j = j+1
            while j < n and pat[j] != ']':
                j = j+1
            if j >= n:
                add('\\[')
            else:
                stuff = pat[i:j]
                if '-' not in stuff:
                    stuff = stuff.replace('\\', r'\\')
                else:
                    chunks = []
                    k = i+2 if pat[i] == '!' else i+1
                    while True:
                        k = pat.find('-', k, j)
                        if k < 0:
                            break
                        chunks.append(pat[i:k])
                        i = k+1
                        k = k+3
                    chunk = pat[i:j]
                    if chunk:
                        chunks.append(chunk)
                    else:
                        chunks[-1] += '-'
                    # Remove empty ranges -- invalid in RE.
                    for k in range(len(chunks)-1, 0, -1):
                        if chunks[k-1][-1] > chunks[k][0]:
                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
                            del chunks[k]
                    # Escape backslashes and hyphens for set difference (--).
                    # Hyphens that create ranges shouldn't be escaped.
                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
                                     for s in chunks)
                # Escape set operations (&&, ~~ and ||).
                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
                i = j+1
                if not stuff:
                    # Empty range: never match.
                    add('(?!)')
                elif stuff == '!':
                    # Negated empty range: match any character.
                    add('.')
                else:
                    if stuff[0] == '!':
                        stuff = '^' + stuff[1:]
                    elif stuff[0] in ('^', '['):
                        stuff = '\\' + stuff
                    add(f'[{stuff}]')
        else:
            add(re.escape(c))
    assert i == n
    return res


magic_check = re.compile('([*?[])')
magic_check_bytes = re.compile(b'([*?[])')


def _translate(pat: str,
               *,
               recursive: bool = False) -> str:
    """Translate a pathname with shell wildcards to a regular expression.

    If `recursive` is true, the pattern segment '**' will match any number of
    path segments.
    """
    not_sep = '[^/]'
    one_last_segment = f'[^/.]{not_sep}*'
    one_segment = f'{one_last_segment}/'
    any_segments = f'(?:{one_segment})*'
    any_last_segments = f'{any_segments}(?:{one_last_segment})?'

    results = []
    parts = re.split('/', pat)
    last_part_idx = len(parts) - 1
    for idx, part in enumerate(parts):
        if part == '*':
            results.append(one_segment if idx < last_part_idx else one_last_segment)
        elif recursive and part == '**':
            if idx < last_part_idx:
                if parts[idx + 1] != '**':
                    results.append(any_segments)
            else:
                results.append(any_last_segments)
        else:
            if part:
                if part[0] in '*?':
                    results.append(r'(?!\.)')
                results.extend(_translate2(part, f'{not_sep}*', not_sep))
            if idx < last_part_idx:
                results.append('/')
    res = ''.join(results)
    return fr'(?s:{res})\Z'


@functools.lru_cache(maxsize=512)
def _compile_pattern(pat: str,
                     recursive: bool = True) -> Any:
    """Compile given glob pattern to a re.Pattern object (observing case
    sensitivity)."""
    regex = _translate(pat, recursive=recursive)
    return re.compile(regex).match


class _StringGlobber:
    """Class providing shell-style pattern matching and globbing.
    """

    def __init__(self,
                 copy_from: FCPath,
                 recursive: bool = False) -> None:
        self.recursive = recursive
        self.copy_from = copy_from

    # High-level methods

    def compile(self,
                pat: str) -> Any:
        return _compile_pattern(pat, self.recursive)

    def selector(self,
                 parts: list[str]) -> Any:
        """Returns a function that selects from a given path, walking and
        filtering according to the glob-style pattern parts in *parts*.
        """
        if not parts:
            return self.select_exists
        part = parts.pop()
        if self.recursive and part == '**':
            selector = self.recursive_selector
        else:
            selector = self.wildcard_selector
        return selector(part, parts)

    def wildcard_selector(self,
                          part: str,
                          parts: list[str]) -> Callable[[str, bool], Generator[FCPath]]:
        """Returns a function that selects direct children of a given path,
        filtering by pattern.
        """

        match = None if part == '*' else self.compile(part)
        dir_only = bool(parts)
        if dir_only:
            select_next = self.selector(parts)

        def select_wildcard(path: str,
                            exists: bool = False) -> Generator[FCPath]:
            entries = list(FCPath(path, copy_from=self.copy_from).iterdir_metadata())
            for entry, metadata in entries:
                if metadata is None:
                    continue
                if match is None or match(entry.name):
                    if dir_only and not metadata['is_dir']:
                        continue
                    if dir_only:
                        yield from select_next(entry, exists=True)
                    else:
                        yield entry
        return select_wildcard

    def recursive_selector(self,
                           part: str,
                           parts: list[str]) -> Callable[[str, bool], Generator[FCPath]]:
        """Returns a function that selects a given path and all its children,
        recursively, filtering by pattern.
        """
        # Optimization: consume following '**' parts, which have no effect.
        while parts and parts[-1] == '**':
            parts.pop()

        # Optimization: consume and join any following non-special parts here,
        # rather than leaving them for the next selector. They're used to
        # build a regular expression, which we use to filter the results of
        # the recursive walk. As a result, non-special pattern segments
        # following a '**' wildcard don't require additional filesystem access
        # to expand.
        match = None if part == '**' else self.compile(part)
        dir_only = bool(parts)
        select_next = self.selector(parts)

        def select_recursive(path: str,
                             exists: bool = False) -> Generator[FCPath]:
            path_str = str(path)
            if path_str and path_str[-1] != '/':
                path_str = f'{path_str}/'
            match_pos = len(path_str)
            if match is None or match(path_str, match_pos):
                yield from select_next(path_str, exists)
            stack = [path_str]
            while stack:
                yield from select_recursive_step(stack, match_pos)

        def select_recursive_step(stack: list[str],
                                  match_pos: int) -> Generator[Any]:
            path = stack.pop()
            entries = list(FCPath(path, copy_from=self.copy_from).iterdir_metadata())
            for entry, metadata in entries:
                if metadata is None:
                    continue
                if metadata['is_dir'] or not dir_only:
                    if match is None or match(str(entry), match_pos):
                        if dir_only:
                            yield from select_next(entry, exists=True)
                        else:
                            # Optimization: directly yield the path if this is
                            # last pattern part.
                            yield entry
                    if metadata['is_dir']:
                        stack.append(entry.path)

        return select_recursive

    def select_exists(self,
                      path: str,
                      exists: bool = False) -> Generator[str]:
        """Yields the given path, if it exists.
        """
        if exists:
            # Optimization: this path is already known to exist, e.g. because
            # it was returned from os.iterdir(), so we skip calling exists().
            yield path
        else:
            if FCPath(path, copy_from=self.copy_from).exists():
                yield path