Source code for rez.serialise

# SPDX-License-Identifier: Apache-2.0
# Copyright Contributors to the Rez Project


"""
Read and write data from file. File caching via a memcached server is supported.
"""
from contextlib import contextmanager
from inspect import isfunction, ismodule
import sys
import stat
import os
import os.path
import threading

from rez.package_resources import package_rex_keys
from rez.utils.scope import ScopeContext
from rez.utils.sourcecode import SourceCode, early, late, include
from rez.utils.filesystem import TempDirs
from rez.utils.data_utils import ModifyList
from rez.exceptions import ResourceError, InvalidPackageError
from rez.utils.memcached import memcached
from rez.utils.execution import add_sys_paths
from rez.utils import py23
from rez.config import config
from rez.vendor.atomicwrites import atomic_write
from rez.vendor.enum import Enum
from rez.vendor.six.six.moves import StringIO
from rez.vendor.six.six import PY3
from rez.vendor import yaml


tmpdir_manager = TempDirs(config.tmpdir, prefix="rez_write_")
debug_print = config.debug_printer("file_loads")
file_cache = {}


[docs]class FileFormat(Enum):
    py = ("py",)
    yaml = ("yaml",)
    txt = ("txt",)

    __order__ = "py,yaml,txt"

    def __init__(self, extension):
        self.extension = extension


[docs]@contextmanager
def open_file_for_write(filepath, mode=None):
    """Writes both to given filepath, and tmpdir location.

    This is to get around the problem with some NFS's where immediately reading
    a file that has just been written is problematic. Instead, any files that we
    write, we also write to /tmp, and reads of these files are redirected there.

    Args:
        filepath (str): File to write.
        mode (int): Same mode arg as you would pass to `os.chmod`.

    Yields:
        File-like object.
    """
    stream = StringIO()
    yield stream
    content = stream.getvalue()

    filepath = os.path.realpath(filepath)
    tmpdir = tmpdir_manager.mkdtemp()
    cache_filepath = os.path.join(tmpdir, os.path.basename(filepath))
    encoding = {"encoding": "utf-8"} if PY3 else {}

    debug_print("Writing to %s (local cache of %s)", cache_filepath, filepath)

    # Attempt to make file writable if it isn't already. Just fallthrough
    # if this fails, we'll get the error we expect on write anyway
    #
    try:
        if os.path.exists(filepath):
            orig_mode = os.stat(filepath).st_mode
            os.chmod(filepath, orig_mode | stat.S_IWUSR)
    except:
        pass

    # try atomic write, but that can sometimes fail.
    # https://github.com/nerdvegas/rez/issues/858
    #
    written = False
    try:
        with atomic_write(filepath, overwrite=True, **encoding) as f:
            f.write(content)
        written = True
    except:
        pass

    # fallback to standard write
    if not written:
        with open(filepath, 'w', **encoding) as f:
            f.write(content)

    if mode is not None:
        os.chmod(filepath, mode)

    # write the local fs cache copy
    with open(cache_filepath, 'w', **encoding) as f:
        f.write(content)

    file_cache[filepath] = cache_filepath


[docs]def load_from_file(filepath, format_=FileFormat.py, update_data_callback=None,
                   disable_memcache=False):
    """Load data from a file.

    Note:
        Any functions from a .py file will be converted to `SourceCode` objects.

    Args:
        filepath (str): File to load.
        format_ (`FileFormat`): Format of file contents.
        update_data_callback (callable): Used to change data before it is
            returned or cached.
        disable_memcache (bool): If True, don't r/w to memcache.

    Returns:
        dict.
    """
    filepath = os.path.realpath(filepath)
    cache_filepath = file_cache.get(filepath)

    if cache_filepath:
        # file has been written by this process, read it from /tmp to avoid
        # potential write-then-read issues over NFS
        return _load_file(filepath=cache_filepath,
                          format_=format_,
                          update_data_callback=update_data_callback,
                          original_filepath=filepath)
    elif disable_memcache:
        return _load_file(filepath=filepath,
                          format_=format_,
                          update_data_callback=update_data_callback)
    else:
        return _load_from_file(filepath=filepath,
                               format_=format_,
                               update_data_callback=update_data_callback)


def _load_from_file__key(filepath, format_, update_data_callback):
    st = os.stat(filepath)
    if update_data_callback is None:
        callback_key = 'None'
    else:
        callback_key = getattr(update_data_callback, "__name__", "None")

    return str(("package_file", filepath, str(format_), callback_key,
                int(st.st_ino), st.st_mtime))


@memcached(servers=config.memcached_uri if config.cache_package_files else None,
           min_compress_len=config.memcached_package_file_min_compress_len,
           key=_load_from_file__key,
           debug=config.debug_memcache)
def _load_from_file(filepath, format_, update_data_callback):
    return _load_file(filepath, format_, update_data_callback)


def _load_file(filepath, format_, update_data_callback, original_filepath=None):
    load_func = load_functions[format_]

    if debug_print:
        if original_filepath:
            debug_print("Loading file: %s (local cache of %s)",
                        filepath, original_filepath)
        else:
            debug_print("Loading file: %s", filepath)

    with open(filepath) as f:
        result = load_func(f, filepath=filepath)

    if update_data_callback:
        result = update_data_callback(format_, result)
    return result


_set_objects = threading.local()


# Default variables to avoid not-defined errors in early-bound attribs
default_objects = {
    "building": False,
    "build_variant_index": 0,
    "build_variant_requires": []
}


[docs]def get_objects():
    """Get currently bound variables for evaluation of early-bound attribs.

    Returns:
        dict.
    """
    result = default_objects.copy()
    result.update(getattr(_set_objects, "variables", {}))
    return result


[docs]@contextmanager
def set_objects(objects):
    """Set the objects made visible to early-bound attributes.

    For example, `objects` might be used to set a 'build_variant_index' var, so
    that an early-bound 'private_build_requires' can change depending on the
    currently-building variant.

    Args:
        objects (dict): Variables to set.
    """
    _set_objects.variables = objects
    try:
        yield
    finally:
        _set_objects.variables = {}


[docs]def load_py(stream, filepath=None):
    """Load python-formatted data from a stream.

    Args:
        stream (file-like object).

    Returns:
        dict.
    """
    with add_sys_paths(config.package_definition_build_python_paths):
        return _load_py(stream, filepath=filepath)


def _load_py(stream, filepath=None):
    scopes = ScopeContext()

    g = dict(scope=scopes,
             early=early,
             late=late,
             include=include,
             ModifyList=ModifyList,
             InvalidPackageError=InvalidPackageError)

    try:
        with open(filepath, "rb") as f:
            exec(compile(f.read(), filepath, 'exec'), g)
    except Exception as e:
        import traceback
        frames = traceback.extract_tb(sys.exc_info()[2])
        while filepath and frames and frames[0][0] != filepath:
            frames = frames[1:]

        msg = "Problem loading %s: %s" % (filepath, str(e))
        stack = ''.join(traceback.format_list(frames)).strip()
        if stack:
            msg += ":\n" + stack
        raise ResourceError(msg)

    result = {}
    excludes = set(('scope', 'InvalidPackageError', '__builtins__',
                    'early', 'late', 'include', 'ModifyList'))

    for k, v in g.items():
        if k not in excludes and \
                (k not in __builtins__ or __builtins__[k] != v):
            result[k] = v

    result.update(scopes.to_dict())
    result = process_python_objects(result, filepath=filepath)
    return result


[docs]class EarlyThis(object):
    """The 'this' object for @early bound functions.

    Just exposes raw package data as object attributes.
    """
    def __init__(self, data):
        self._data = data

    def __getattr__(self, attr):
        missing = object()
        value = self._data.get(attr, missing)
        if value is missing:
            raise AttributeError("No such package attribute '%s'" % attr)

        if isfunction(value) and (hasattr(value, "_early") or hasattr(value, "_late")):
            raise ValueError(
                "An early binding function cannot refer to another early or "
                "late binding function: '%s'" % attr)

        return value


[docs]def process_python_objects(data, filepath=None):
    """Replace certain values in the given package data dict.

    Does things like:
    * evaluates @early decorated functions, and replaces with return value;
    * converts functions into `SourceCode` instances so they can be serialized
      out to installed packages, and evaluated later;
    * strips some values (modules, __-leading variables) that are never to be
      part of installed packages.

    Returns:
        dict: Updated dict.
    """
    def _process(value):
        if isinstance(value, dict):
            for k, v in value.items():
                value[k] = _process(v)

            return value
        elif isfunction(value):
            func = value

            if hasattr(func, "_early"):
                # run the function now, and replace with return value
                #

                # make a copy of the func with its own globals, and add 'this'
                import types
                fn = types.FunctionType(func.__code__,
                                        func.__globals__.copy(),
                                        name=func.__name__,
                                        argdefs=func.__defaults__,
                                        closure=func.__closure__)

                # apply globals
                fn.__globals__["this"] = EarlyThis(data)
                fn.__globals__.update(get_objects())

                # execute the function
                args = py23.get_function_arg_names(func)

                if len(args) not in (0, 1):
                    raise ResourceError("@early decorated function must "
                                        "take zero or one args only")
                if args:
                    # this 'data' arg support isn't needed anymore, but I'm
                    # supporting it til I know nobody is using it...
                    #
                    value_ = fn(data)
                else:
                    value_ = fn()

                # process again in case this is a function returning a function
                return _process(value_)

            elif hasattr(func, "_late"):
                return SourceCode(func=func, filepath=filepath,
                                  eval_as_function=True)

            elif func.__name__ in package_rex_keys:
                # if a rex function, the code has to be eval'd NOT as a function,
                # otherwise the globals dict doesn't get updated with any vars
                # defined in the code, and that means rex code like this:
                #
                # rr = 'test'
                # env.RR = '{rr}'
                #
                # ..won't work. It was never intentional that the above work, but
                # it does, so now we have to keep it so.
                #
                return SourceCode(func=func, filepath=filepath,
                                  eval_as_function=False)

            else:
                # a normal function. Leave unchanged, it will be stripped after
                return func
        else:
            return value

    def _trim(value):
        if isinstance(value, dict):
            for k, v in list(value.items()):
                if isfunction(v):
                    if v.__name__ == "preprocess":
                        # preprocess is a special case. It has to stay intact
                        # until the `DeveloperPackage` has a chance to apply it;
                        # after which it gets removed from the package attributes.
                        #
                        pass
                    else:
                        del value[k]
                elif ismodule(v) or k.startswith("__"):
                    del value[k]
                else:
                    value[k] = _trim(v)

        return value

    data = _process(data)
    data = _trim(data)
    return data


[docs]def load_yaml(stream, **kwargs):
    """Load yaml-formatted data from a stream.

    Args:
        stream (file-like object).

    Returns:
        dict.
    """
    # if there's an error parsing the yaml, and you pass yaml.load a string,
    # it will print lines of context, but will print "<string>" instead of a
    # filename; if you pass a stream, it will print the filename, but no lines
    # of context.
    # Get the best of both worlds, by passing it a string, then replacing
    # "<string>" with the filename if there's an error...
    content = stream.read()
    try:
        return yaml.load(content, Loader=yaml.FullLoader) or {}
    except Exception as e:
        if stream.name and stream.name != '<string>':
            for mark_name in 'context_mark', 'problem_mark':
                mark = getattr(e, mark_name, None)
                if mark is None:
                    continue
                if getattr(mark, 'name') == '<string>':
                    mark.name = stream.name
        raise e


[docs]def load_txt(stream, **kwargs):
    """Load text data from a stream.

    Args:
        stream (file-like object).

    Returns:
        string.
    """
    content = stream.read()
    return content


[docs]def clear_file_caches():
    """Clear any cached files."""
    _load_from_file.forget()


load_functions = {FileFormat.py:      load_py,
                  FileFormat.yaml:    load_yaml,
                  FileFormat.txt:     load_txt}