Source code for sbg.cwl.v1_0.types

from sbg.cwl.v1_0.base import Cwl, salad
from sbg.cwl.v1_0.check import to_str, to_int
from sbg.cwl.v1_0.util import is_instance_all


@salad
def to_file_dir_list(value):
    @salad
    def map_dict(d):
        if d['class'] == 'Directory':
            return Directory(**d)
        elif d['class'] == 'File':
            return File(**d)
        else:
            raise ValueError('Unsupported class')

    if value is not None:
        if is_instance_all(value, File, Directory):
            return value
        elif is_instance_all(value, dict):
            return list(map(map_dict, value))
        else:
            raise TypeError(
                'Expected list[File|Directory], got: {}'.format(
                    type(value))
            )


[docs]class Primitive(object): # no value NULL = 'null' # a binary value BOOLEAN = 'boolean' # 32-bit signed integer INT = 'int' # 64-bit signed integer LONG = 'long' # single precision (32-bit) IEEE 754 floating-point number FLOAT = 'float' # double precision (64-bit) IEEE 754 floating-point number DOUBLE = 'double' # Unicode character sequence STRING = 'string' # A File object FILE = 'File' # A Directory object DIRECTORY = 'Directory' ANY = 'Any'
[docs]def is_primitive(t): """ Checks by CWL v1.0 specification if object ``t`` is primitive type. """ primitives = [ Primitive.BOOLEAN, Primitive.INT, Primitive.LONG, Primitive.FLOAT, Primitive.DOUBLE, Primitive.STRING, Primitive.FILE, Primitive.DIRECTORY, Primitive.NULL, Primitive.ANY ] non_req = [] for p in primitives: non_req += ["{}?".format(p), [Primitive.NULL, p]] return t in primitives + non_req
[docs]def is_number(t): """ Checks by CWL v1.0 specification if object ``t`` is number type. """ return t in ( Primitive.INT, Primitive.LONG, Primitive.FLOAT, Primitive.DOUBLE )
class File(Cwl): """ Represents a file (or group of files when secondaryFiles is provided) that will be accessible by tools using standard POSIX file system call API such as open(2) and read(2). Files are represented as objects with class of File. File objects have a number of properties that provide metadata about the file. The location property of a File is a URI that uniquely identifies the file. Implementations must support the file:// URI scheme and may support other schemes such as http://. The value of location may also be a relative reference, in which case it must be resolved relative to the URI of the document it appears in. Alternately to location, implementations must also accept the path property on File, which must be a filesystem path available on the same host as the CWL runner (for inputs) or the runtime environment of a command line tool execution (for command line tool outputs). If no location or path is specified, a file object must specify contents with the UTF-8 text content of the file. This is a "file literal". File literals do not correspond to external resources, but are created on disk with contents with when needed for a executing a tool. Where appropriate, expressions can return file literals to define new files on a runtime. The maximum size of contents is 64 kilobytes. The basename property defines the filename on disk where the file is staged. This may differ from the resource name. If not provided, basename must be computed from the last path part of location and made available to expressions. The secondaryFiles property is a list of File or Directory objects that must be staged in the same directory as the primary file. It is an error for file names to be duplicated in secondaryFiles. The size property is the size in bytes of the File. It must be computed from the resource and made available to expressions. The checksum field contains a cryptographic hash of the file content for use it verifying file contents. Implementations may, at user option, enable or disable computation of the checksum field for performance or other reasons. However, the ability to compute output checksums is required to pass the CWL conformance test suite. When executing a CommandLineTool, the files and secondary files may be staged to an arbitrary directory, but must use the value of basename for the filename. The path property must be file path in the context of the tool execution runtime (local to the compute node, or within the executing container). All computed properties should be available to expressions. File literals also must be staged and path must be set. When collecting CommandLineTool outputs, glob matching returns file paths (with the path property) and the derived properties. This can all be modified by outputEval. Alternately, if the file cwl.outputs.json is present in the output, outputBinding is ignored. File objects in the output must provide either a location URI or a path property in the context of the tool execution runtime (local to the compute node, or within the executing container). When evaluating an ExpressionTool, file objects must be referenced via location (the expression tool does not have access to files on disk so path is meaningless) or as file literals. It is legal to return a file object with an existing location but a different basename. The loadContents field of ExpressionTool inputs behaves the same as on CommandLineTool inputs, however it is not meaningful on the outputs. An ExpressionTool may forward file references from input to output by using the same value for location. """ class_ = 'File' def __init__(self, location=None, path=None, basename=None, dirname=None, nameroot=None, nameext=None, checksum=None, size=None, secondary_files=None, format=None, contents=None): super(File, self).__init__() self['class'] = self.class_ self.location = location self.path = path self.basename = basename self.dirname = dirname self.nameroot = nameroot self.nameext = nameext self.checksum = checksum self.size = size self.secondary_files = secondary_files self.format = format self.contents = contents def add_secondary_file(self, file): if not self.secondary_files: self.secondary_files = [] self.secondary_files.append(file) @property def location(self): """ An IRI that identifies the file resource. This may be a relative reference, in which case it must be resolved using the base IRI of the document. The location may refer to a local or remote resource; the implementation must use the IRI to retrieve file content. If an implementation is unable to retrieve the file content stored at a remote resource (due to unsupported protocol, access denied, or other issue) it must signal an error. If the location field is not provided, the contents field must be provided. The implementation must assign a unique identifier for the location field. If the path field is provided but the location field is not, an implementation may assign the value of the path field to location, then follow the rules above. """ return self.get('location') @location.setter def location(self, value): self['location'] = to_str(value) @property def path(self): """ The local host path where the File is available when a CommandLineTool is executed. This field must be set by the implementation. The final path component must match the value of basename. This field must not be used in any other context. The command line tool being executed must be able to to access the file at path using the POSIX open(2) syscall. As a special case, if the path field is provided but the location field is not, an implementation may assign the value of the path field to location, and remove the path field. """ return self.get('path') @path.setter def path(self, value): self['path'] = to_str(value) @property def basename(self): """ The base name of the file, that is, the name of the file without any leading directory path. The base name must not contain a slash /. If not provided, the implementation must set this field based on the location field by taking the final path component after parsing location as an IRI. If basename is provided, it is not required to match the value from location. When this file is made available to a CommandLineTool, it must be named with basename, i.e. the final component of the path field must match basename. """ return self.get('basename') @basename.setter def basename(self, value): self['basename'] = to_str(value) @property def dirname(self): """ The name of the directory containing file, that is, the path leading up to the final slash in the path such that dirname + '/' + basename == path. The implementation must set this field based on the value of path prior to evaluating parameter references or expressions in a CommandLineTool document. This field must not be used in any other context. """ return self.get('dirname') @dirname.setter def dirname(self, value): self['dirname'] = to_str(value) @property def nameroot(self): """ The basename root such that nameroot + nameext == basename, and nameext is empty or begins with a period and contains at most one period. For the purposess of path splitting leading periods on the basename are ignored; a basename of .cshrc will have a nameroot of .cshrc. The implementation must set this field automatically based on the value of basename prior to evaluating parameter references or expressions. """ return self.get('nameroot') @nameroot.setter def nameroot(self, value): self['nameroot'] = to_str(value) @property def nameext(self): """ The basename extension such that nameroot + nameext == basename, and nameext is empty or begins with a period and contains at most one period. Leading periods on the basename are ignored; a basename of .cshrc will have an empty nameext. The implementation must set this field automatically based on the value of basename prior to evaluating parameter references or expressions. """ return self.get('nameext') @nameext.setter def nameext(self, value): self['nameext'] = to_str(value) @property def checksum(self): """ Optional hash code for validating file integrity. Currently must be in the form "sha1$ + hexadecimal string" using the SHA-1 algorithm. """ return self.get('checksum') @checksum.setter def checksum(self, value): self['checksum'] = to_str(value) @property def size(self): """ Optional file size. """ return self.get('size') @size.setter def size(self, value): self['size'] = to_int(value) @property def secondary_files(self): """ A list of additional files or directories that are associated with the primary file and must be transferred alongside the primary file. Examples include indexes of the primary file, or external references which must be included when loading primary document. A file object listed in secondaryFiles may itself include secondaryFiles for which the same rules apply. """ return self.get('secondaryFiles') @secondary_files.setter def secondary_files(self, value): self['secondaryFiles'] = to_file_dir_list(value) @property def format(self): """ The format of the file: this must be an IRI of a concept node that represents the file format, preferrably defined within an ontology. If no ontology is available, file formats may be tested by exact match. Reasoning about format compatability must be done by checking that an input file format is the same, owl:equivalentClass or rdfs:subClassOf the format required by the input parameter. owl:equivalentClass is transitive with rdfs:subClassOf, e.g. if <B> owl:equivalentClass <C> and <B> owl:subclassOf <A> then infer <C> owl:subclassOf <A>. File format ontologies may be provided in the "$schema" metadata at the root of the document. If no ontologies are specified in $schema, the runtime may perform exact file format matches. """ return self.get('format') @format.setter def format(self, value): self['format'] = to_str(value) @property def contents(self): """ File contents literal. Maximum of 64 KiB. If neither location nor path is provided, contents must be non-null. The implementation must assign a unique identifier for the location field. When the file is staged as input to CommandLineTool, the value of contents must be written to a file. If loadContents of inputBinding or outputBinding is true and location is valid, the implementation must read up to the first 64 KiB of text from the file and place it in the "contents" field. """ return self.get('contents') @contents.setter def contents(self, value): self['contents'] = to_str(value) class Directory(Cwl): """ Represents a directory to present to a command line tool. Directories are represented as objects with class of Directory. Directory objects have a number of properties that provide metadata about the directory. The location property of a Directory is a URI that uniquely identifies the directory. Implementations must support the file:// URI scheme and may support other schemes such as http://. Alternately to location, implementations must also accept the path property on Direcotry, which must be a filesystem path available on the same host as the CWL runner (for inputs) or the runtime environment of a command line tool execution (for command line tool outputs). A Directory object may have a listing field. This is a list of File and Directory objects that are contained in the Directory. For each entry in listing, the basename property defines the name of the File or Subdirectory when staged to disk. If listing is not provided, the implementation must have some way of fetching the Directory listing at runtime based on the location field. If a Directory does not have location, it is a Directory literal. A Directory literal must provide listing. Directory literals must be created on disk at runtime as needed. The resources in a Directory literal do not need to have any implied relationship in their location. For example, a Directory listing may contain two files located on different hosts. It is the responsibility of the runtime to ensure that those files are staged to disk appropriately. Secondary files associated with files in listing must also be staged to the same Directory. When executing a CommandLineTool, Directories must be recursively staged first and have local values of path assigend. Directory objects in CommandLineTool output must provide either a location URI or a path property in the context of the tool execution runtime (local to the compute node, or within the executing container). An ExpressionTool may forward file references from input to output by using the same value for location. Name conflicts (the same basename appearing multiple times in listing or in any entry in secondaryFiles in the listing) is a fatal error. """ class_ = 'Directory' def __init__(self, location=None, path=None, basename=None, listing=None): super(Directory, self).__init__() self['class'] = self.class_ self.location = location self.path = path self.basename = basename self.listing = listing def add_listing(self, i): if not self.listing: self.listing = [] self.listing.append(i) @property def location(self): """ An IRI that identifies the directory resource. This may be a relative reference, in which case it must be resolved using the base IRI of the document. The location may refer to a local or remote resource. If the listing field is not set, the implementation must use the location IRI to retrieve directory listing. If an implementation is unable to retrieve the directory listing stored at a remote resource (due to unsupported protocol, access denied, or other issue) it must signal an error. If the location field is not provided, the listing field must be provided. The implementation must assign a unique identifier for the location field. If the path field is provided but the location field is not, an implementation may assign the value of the path field to location, then follow the rules above. """ return self.get('location') @location.setter def location(self, value): self['location'] = to_str(value) @property def path(self): """ The local path where the Directory is made available prior to executing a CommandLineTool. This must be set by the implementation. This field must not be used in any other context. The command line tool being executed must be able to to access the directory at path using the POSIX opendir(2) syscall. """ return self.get('path') @path.setter def path(self, value): self['path'] = to_str(value) @property def basename(self): """ The base name of the directory, that is, the name of the file without any leading directory path. The base name must not contain a slash /. If not provided, the implementation must set this field based on the location field by taking the final path component after parsing location as an IRI. If basename is provided, it is not required to match the value from location. When this file is made available to a CommandLineTool, it must be named with basename, i.e. the final component of the path field must match basename. """ return self.get('basename') @basename.setter def basename(self, value): self['basename'] = to_str(value) @property def listing(self): """ List of files or subdirectories contained in this directory. The name of each file or subdirectory is determined by the basename field of each File or Directory object. It is an error if a File shares a basename with any other entry in listing. If two or more Directory object share the same basename, this must be treated as equivalent to a single subdirectory with the listings recursively merged. """ return self.get('listing') @listing.setter def listing(self, value): self['listing'] = to_file_dir_list(value)