|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +# |
| 3 | +# Copyright (c) 2021 VMware, Inc. All Rights Reserved. |
| 4 | +# SPDX-License-Identifier: BSD-2-Clause |
| 5 | + |
| 6 | +""" |
| 7 | +General helpers for SPDX document generator |
| 8 | +""" |
| 9 | +import datetime |
| 10 | +import hashlib |
| 11 | +import io |
| 12 | +import re |
| 13 | +import uuid |
| 14 | +from datetime import datetime |
| 15 | +from typing import Union, Callable, IO, Tuple |
| 16 | + |
| 17 | +from license_expression import get_spdx_licensing, LicenseExpression, Licensing |
| 18 | +from spdx_tools.spdx.model import SpdxNone, Document |
| 19 | + |
| 20 | +from tern.classes.file_data import FileData |
| 21 | +from tern.classes.image import Image |
| 22 | +from tern.classes.image_layer import ImageLayer |
| 23 | +from tern.classes.package import Package |
| 24 | + |
| 25 | + |
| 26 | +def get_uuid() -> str: |
| 27 | + return str(uuid.uuid4()) |
| 28 | + |
| 29 | + |
| 30 | +def get_current_timestamp() -> datetime: |
| 31 | + return datetime.utcnow().replace(microsecond=0) |
| 32 | + |
| 33 | + |
| 34 | +def get_string_id(string: str) -> str: |
| 35 | + """Return a unique identifier for the given string""" |
| 36 | + return hashlib.sha256(string.encode('utf-8')).hexdigest()[-7:] |
| 37 | + |
| 38 | + |
| 39 | +def get_license_ref(license_string: str) -> str: |
| 40 | + """For SPDX format, return a LicenseRef string""" |
| 41 | + return 'LicenseRef-' + get_string_id(str(license_string)) |
| 42 | + |
| 43 | + |
| 44 | +def replace_invalid_chars_in_license_expression(license_string: str) -> str: |
| 45 | + """Given a license string, replace common invalid SPDX license characters.""" |
| 46 | + not_allowed = [',', ';', '/', '&'] |
| 47 | + if any(x in license_string for x in not_allowed): |
| 48 | + # Try to replace common invalid license characters |
| 49 | + license_string = license_string.replace(',', ' and') |
| 50 | + license_string = license_string.replace('/', '-') |
| 51 | + license_string = license_string.replace(';', '.') |
| 52 | + license_string = license_string.replace('&', 'and') |
| 53 | + return license_string |
| 54 | + |
| 55 | + |
| 56 | +def is_valid_license_expression(license_string: str) -> bool: |
| 57 | + licensing = get_spdx_licensing() |
| 58 | + try: |
| 59 | + return licensing.validate(license_string).errors == [] |
| 60 | + # Catch any invalid license chars here |
| 61 | + except AttributeError: |
| 62 | + return False |
| 63 | + |
| 64 | + |
| 65 | +def get_package_license_declared(package_license_declared: str) -> Union[LicenseExpression, SpdxNone]: |
| 66 | + """After substituting common invalid SPDX license characters using |
| 67 | + the is_spdx_license_expression() function, determines if the declared |
| 68 | + license string for a package or file is a valid SPDX license expression. |
| 69 | + If license expression is valid after substitutions, return the updated string. |
| 70 | + If not, return the LicenseRef of the original declared license expression |
| 71 | + passed in to the function. If a blank string is passed in, return `NONE`.""" |
| 72 | + if package_license_declared: |
| 73 | + package_license_declared = replace_invalid_chars_in_license_expression(package_license_declared) |
| 74 | + if is_valid_license_expression(package_license_declared): |
| 75 | + return Licensing().parse(package_license_declared) |
| 76 | + |
| 77 | + return Licensing().parse(get_license_ref(package_license_declared)) |
| 78 | + return SpdxNone() |
| 79 | + |
| 80 | + |
| 81 | +def get_serialized_document_string(spdx_document: Document, writer_function: Callable[[Document, IO[str]], str]) -> str: |
| 82 | + with io.StringIO() as stream: |
| 83 | + writer_function(spdx_document, stream, validate=False) |
| 84 | + return stream.getvalue() |
| 85 | + |
| 86 | + |
| 87 | +########################################################################################### |
| 88 | +# central place for SPDXRef-generators to avoid circular imports as these are widely used # |
| 89 | +########################################################################################### |
| 90 | + |
| 91 | +def get_image_spdxref(image_obj: Image) -> str: |
| 92 | + """Given the image object, return an SPDX reference ID""" |
| 93 | + # here we return the image name, tag and id |
| 94 | + return f'SPDXRef-{image_obj.get_human_readable_id()}' |
| 95 | + |
| 96 | + |
| 97 | +def get_package_spdxref(package_obj: Package) -> Tuple[str, str]: |
| 98 | + """Given the package obj, return an SPDX reference ID for the binary |
| 99 | + and source package, if available""" |
| 100 | + pkg_ref = f"{package_obj.name}-{package_obj.version}" |
| 101 | + src_ref = '' |
| 102 | + if package_obj.src_name: |
| 103 | + # differentiate between binary and source package refs |
| 104 | + src_ver = package_obj.src_version + "-src" |
| 105 | + src_ref = f"{package_obj.src_name}-{src_ver}" |
| 106 | + # replace all the strings that SPDX doesn't like |
| 107 | + # allowed characters are: letters, numbers, "." and "-" |
| 108 | + clean_pkg_ref = re.sub(r'[:+~_/]', r'-', pkg_ref) |
| 109 | + if src_ref: |
| 110 | + clean_src_ref = re.sub(r'[:+~/]', r'-', src_ref) |
| 111 | + return f'SPDXRef-{clean_pkg_ref}', f'SPDXRef-{clean_src_ref}' |
| 112 | + return f'SPDXRef-{clean_pkg_ref}', '' |
| 113 | + |
| 114 | + |
| 115 | +def get_layer_spdxref(layer_obj: ImageLayer) -> str: |
| 116 | + """Given the layer object, return an SPDX reference ID""" |
| 117 | + # here we return the shortened diff_id of the layer |
| 118 | + return f'SPDXRef-{layer_obj.diff_id[:10]}' |
| 119 | + |
| 120 | + |
| 121 | +def get_file_spdxref(filedata: FileData, layer_id: str) -> str: |
| 122 | + """Given a FileData object, return a unique identifier for the SPDX |
| 123 | + document. According to the spec, this should be of the form: SPDXRef-<id> |
| 124 | + We will use a combination of the file name, checksum and layer_id and |
| 125 | + calculate a hash of this string""" |
| 126 | + file_string = filedata.path + filedata.checksum[:7] + layer_id |
| 127 | + fileid = get_string_id(file_string) |
| 128 | + return f'SPDXRef-{fileid}' |
0 commit comments