Source code for harvester.resource

from abc import abstractmethod, ABC
from typing import Iterator

from vs_common.filesystem import FilesystemConfig, get_filesystem


[docs]class Resource(ABC): """ Represents online resource such as an endpoint (API...) or data source (S3/swift...) that provides data or metadata. """
[docs] @abstractmethod def harvest(self) -> Iterator[dict]: """ Starts the harvesting of the resource, returning an iterator of the harvested items. """ ...
[docs]class Endpoint(Resource): """ Endpoints are resources that use a search protocol (or something similar) to harvest items. Thus, they are always associated with a specific URL. """ def __init__(self, url: str): self.url = url
[docs]class FileScheme(Resource): """ FileSchemes are resources that operate on a file basis on a given file source. """ def __init__(self, filesystem_config: FilesystemConfig, root_path: str): self.filesystem_config = filesystem_config self.filesystem = get_filesystem(filesystem_config) self.root_path = root_path