# SPDX-License-Identifier: Apache-2.0
# Copyright Contributors to the Rez Project
"""
Read and write data from file. File caching via a memcached server is supported.
"""
from contextlib import contextmanager
from inspect import isfunction, ismodule
import sys
import stat
import os
import os.path
import threading
from rez.package_resources import package_rex_keys
from rez.utils.scope import ScopeContext
from rez.utils.sourcecode import SourceCode, early, late, include
from rez.utils.filesystem import TempDirs
from rez.utils.data_utils import ModifyList
from rez.exceptions import ResourceError, InvalidPackageError
from rez.utils.memcached import memcached
from rez.utils.execution import add_sys_paths
from rez.utils import py23
from rez.config import config
from rez.vendor.atomicwrites import atomic_write
from rez.vendor.enum import Enum
from rez.vendor.six.six.moves import StringIO
from rez.vendor.six.six import PY3
from rez.vendor import yaml
tmpdir_manager = TempDirs(config.tmpdir, prefix="rez_write_")
debug_print = config.debug_printer("file_loads")
file_cache = {}
[docs]@contextmanager
def open_file_for_write(filepath, mode=None):
"""Writes both to given filepath, and tmpdir location.
This is to get around the problem with some NFS's where immediately reading
a file that has just been written is problematic. Instead, any files that we
write, we also write to /tmp, and reads of these files are redirected there.
Args:
filepath (str): File to write.
mode (int): Same mode arg as you would pass to `os.chmod`.
Yields:
File-like object.
"""
stream = StringIO()
yield stream
content = stream.getvalue()
filepath = os.path.realpath(filepath)
tmpdir = tmpdir_manager.mkdtemp()
cache_filepath = os.path.join(tmpdir, os.path.basename(filepath))
encoding = {"encoding": "utf-8"} if PY3 else {}
debug_print("Writing to %s (local cache of %s)", cache_filepath, filepath)
# Attempt to make file writable if it isn't already. Just fallthrough
# if this fails, we'll get the error we expect on write anyway
#
try:
if os.path.exists(filepath):
orig_mode = os.stat(filepath).st_mode
os.chmod(filepath, orig_mode | stat.S_IWUSR)
except:
pass
# try atomic write, but that can sometimes fail.
# https://github.com/nerdvegas/rez/issues/858
#
written = False
try:
with atomic_write(filepath, overwrite=True, **encoding) as f:
f.write(content)
written = True
except:
pass
# fallback to standard write
if not written:
with open(filepath, 'w', **encoding) as f:
f.write(content)
if mode is not None:
os.chmod(filepath, mode)
# write the local fs cache copy
with open(cache_filepath, 'w', **encoding) as f:
f.write(content)
file_cache[filepath] = cache_filepath
[docs]def load_from_file(filepath, format_=FileFormat.py, update_data_callback=None,
disable_memcache=False):
"""Load data from a file.
Note:
Any functions from a .py file will be converted to `SourceCode` objects.
Args:
filepath (str): File to load.
format_ (`FileFormat`): Format of file contents.
update_data_callback (callable): Used to change data before it is
returned or cached.
disable_memcache (bool): If True, don't r/w to memcache.
Returns:
dict.
"""
filepath = os.path.realpath(filepath)
cache_filepath = file_cache.get(filepath)
if cache_filepath:
# file has been written by this process, read it from /tmp to avoid
# potential write-then-read issues over NFS
return _load_file(filepath=cache_filepath,
format_=format_,
update_data_callback=update_data_callback,
original_filepath=filepath)
elif disable_memcache:
return _load_file(filepath=filepath,
format_=format_,
update_data_callback=update_data_callback)
else:
return _load_from_file(filepath=filepath,
format_=format_,
update_data_callback=update_data_callback)
def _load_from_file__key(filepath, format_, update_data_callback):
st = os.stat(filepath)
if update_data_callback is None:
callback_key = 'None'
else:
callback_key = getattr(update_data_callback, "__name__", "None")
return str(("package_file", filepath, str(format_), callback_key,
int(st.st_ino), st.st_mtime))
@memcached(servers=config.memcached_uri if config.cache_package_files else None,
min_compress_len=config.memcached_package_file_min_compress_len,
key=_load_from_file__key,
debug=config.debug_memcache)
def _load_from_file(filepath, format_, update_data_callback):
return _load_file(filepath, format_, update_data_callback)
def _load_file(filepath, format_, update_data_callback, original_filepath=None):
load_func = load_functions[format_]
if debug_print:
if original_filepath:
debug_print("Loading file: %s (local cache of %s)",
filepath, original_filepath)
else:
debug_print("Loading file: %s", filepath)
with open(filepath) as f:
result = load_func(f, filepath=filepath)
if update_data_callback:
result = update_data_callback(format_, result)
return result
_set_objects = threading.local()
# Default variables to avoid not-defined errors in early-bound attribs
default_objects = {
"building": False,
"build_variant_index": 0,
"build_variant_requires": []
}
[docs]def get_objects():
"""Get currently bound variables for evaluation of early-bound attribs.
Returns:
dict.
"""
result = default_objects.copy()
result.update(getattr(_set_objects, "variables", {}))
return result
[docs]@contextmanager
def set_objects(objects):
"""Set the objects made visible to early-bound attributes.
For example, `objects` might be used to set a 'build_variant_index' var, so
that an early-bound 'private_build_requires' can change depending on the
currently-building variant.
Args:
objects (dict): Variables to set.
"""
_set_objects.variables = objects
try:
yield
finally:
_set_objects.variables = {}
[docs]def load_py(stream, filepath=None):
"""Load python-formatted data from a stream.
Args:
stream (file-like object).
Returns:
dict.
"""
with add_sys_paths(config.package_definition_build_python_paths):
return _load_py(stream, filepath=filepath)
def _load_py(stream, filepath=None):
scopes = ScopeContext()
g = dict(scope=scopes,
early=early,
late=late,
include=include,
ModifyList=ModifyList,
InvalidPackageError=InvalidPackageError)
try:
with open(filepath, "rb") as f:
exec(compile(f.read(), filepath, 'exec'), g)
except Exception as e:
import traceback
frames = traceback.extract_tb(sys.exc_info()[2])
while filepath and frames and frames[0][0] != filepath:
frames = frames[1:]
msg = "Problem loading %s: %s" % (filepath, str(e))
stack = ''.join(traceback.format_list(frames)).strip()
if stack:
msg += ":\n" + stack
raise ResourceError(msg)
result = {}
excludes = set(('scope', 'InvalidPackageError', '__builtins__',
'early', 'late', 'include', 'ModifyList'))
for k, v in g.items():
if k not in excludes and \
(k not in __builtins__ or __builtins__[k] != v):
result[k] = v
result.update(scopes.to_dict())
result = process_python_objects(result, filepath=filepath)
return result
[docs]class EarlyThis(object):
"""The 'this' object for @early bound functions.
Just exposes raw package data as object attributes.
"""
def __init__(self, data):
self._data = data
def __getattr__(self, attr):
missing = object()
value = self._data.get(attr, missing)
if value is missing:
raise AttributeError("No such package attribute '%s'" % attr)
if isfunction(value) and (hasattr(value, "_early") or hasattr(value, "_late")):
raise ValueError(
"An early binding function cannot refer to another early or "
"late binding function: '%s'" % attr)
return value
[docs]def process_python_objects(data, filepath=None):
"""Replace certain values in the given package data dict.
Does things like:
* evaluates @early decorated functions, and replaces with return value;
* converts functions into `SourceCode` instances so they can be serialized
out to installed packages, and evaluated later;
* strips some values (modules, __-leading variables) that are never to be
part of installed packages.
Returns:
dict: Updated dict.
"""
def _process(value):
if isinstance(value, dict):
for k, v in value.items():
value[k] = _process(v)
return value
elif isfunction(value):
func = value
if hasattr(func, "_early"):
# run the function now, and replace with return value
#
# make a copy of the func with its own globals, and add 'this'
import types
fn = types.FunctionType(func.__code__,
func.__globals__.copy(),
name=func.__name__,
argdefs=func.__defaults__,
closure=func.__closure__)
# apply globals
fn.__globals__["this"] = EarlyThis(data)
fn.__globals__.update(get_objects())
# execute the function
args = py23.get_function_arg_names(func)
if len(args) not in (0, 1):
raise ResourceError("@early decorated function must "
"take zero or one args only")
if args:
# this 'data' arg support isn't needed anymore, but I'm
# supporting it til I know nobody is using it...
#
value_ = fn(data)
else:
value_ = fn()
# process again in case this is a function returning a function
return _process(value_)
elif hasattr(func, "_late"):
return SourceCode(func=func, filepath=filepath,
eval_as_function=True)
elif func.__name__ in package_rex_keys:
# if a rex function, the code has to be eval'd NOT as a function,
# otherwise the globals dict doesn't get updated with any vars
# defined in the code, and that means rex code like this:
#
# rr = 'test'
# env.RR = '{rr}'
#
# ..won't work. It was never intentional that the above work, but
# it does, so now we have to keep it so.
#
return SourceCode(func=func, filepath=filepath,
eval_as_function=False)
else:
# a normal function. Leave unchanged, it will be stripped after
return func
else:
return value
def _trim(value):
if isinstance(value, dict):
for k, v in list(value.items()):
if isfunction(v):
if v.__name__ == "preprocess":
# preprocess is a special case. It has to stay intact
# until the `DeveloperPackage` has a chance to apply it;
# after which it gets removed from the package attributes.
#
pass
else:
del value[k]
elif ismodule(v) or k.startswith("__"):
del value[k]
else:
value[k] = _trim(v)
return value
data = _process(data)
data = _trim(data)
return data
[docs]def load_yaml(stream, **kwargs):
"""Load yaml-formatted data from a stream.
Args:
stream (file-like object).
Returns:
dict.
"""
# if there's an error parsing the yaml, and you pass yaml.load a string,
# it will print lines of context, but will print "<string>" instead of a
# filename; if you pass a stream, it will print the filename, but no lines
# of context.
# Get the best of both worlds, by passing it a string, then replacing
# "<string>" with the filename if there's an error...
content = stream.read()
try:
return yaml.load(content, Loader=yaml.FullLoader) or {}
except Exception as e:
if stream.name and stream.name != '<string>':
for mark_name in 'context_mark', 'problem_mark':
mark = getattr(e, mark_name, None)
if mark is None:
continue
if getattr(mark, 'name') == '<string>':
mark.name = stream.name
raise e
[docs]def load_txt(stream, **kwargs):
"""Load text data from a stream.
Args:
stream (file-like object).
Returns:
string.
"""
content = stream.read()
return content
[docs]def clear_file_caches():
"""Clear any cached files."""
_load_from_file.forget()
load_functions = {FileFormat.py: load_py,
FileFormat.yaml: load_yaml,
FileFormat.txt: load_txt}