From 5cd799cc5d04527ac782270008ff647b3779ff05 Mon Sep 17 00:00:00 2001 From: Phil Sphicas Date: Thu, 13 Jan 2022 13:24:27 -0800 Subject: [PATCH] Allow source substring extraction When performing substitutions, there are occasions when the source value does not exactly match the format required by the destination document (e.g. the values.yaml structure of an Armada chart). This change provides the ability extract a substring of the source value, and substitute that into the destination document. Two optional fields are added to `src` under `metadata.substitutions`: * `pattern`: a regular expression, with optional capture groups * `match_group`: the number of the desired capture group The canonical use case is a chart that requires an image with the repo name and tag in separate fields, while the substitution source has the full image path as a single value. For example, assuming that the source document "software-versions" has: data: images: hello: docker.io/library/hello-world:latest Then the following set of substitutions would put the repo and tag in the applicable values in the destination document: metadata: substitutions: - src: schema: pegleg/SoftwareVersions/v1 name: software-versions path: .images.hello pattern: '^(.*):(.*)' match_group: 1 dest: path: .values.images.hello.repo - src: schema: pegleg/SoftwareVersions/v1 name: software-versions path: .images.hello pattern: '^(.*):(.*)' match_group: 2 dest: path: .values.images.hello.tag data: values: images: hello: repo: # docker.io/library/hello-world tag: # latest Change-Id: I2fcb0d2b8e2fe3d85479ac2bad0b7b90f434eb77 --- deckhand/common/utils.py | 22 +++++- .../engine/schemas/metadata_document.yaml | 4 + deckhand/engine/secrets_manager.py | 10 ++- deckhand/tests/unit/common/test_utils.py | 58 ++++++++++++++ .../tests/unit/engine/test_secrets_manager.py | 61 +++++++++++++++ doc/source/users/substitution.rst | 78 +++++++++++++++++++ 6 files changed, 230 insertions(+), 3 deletions(-) diff --git a/deckhand/common/utils.py b/deckhand/common/utils.py index 362b99f6..2516e6ed 100644 --- a/deckhand/common/utils.py +++ b/deckhand/common/utils.py @@ -210,7 +210,8 @@ def _execute_data_expansion(data, jsonpath): d = d.get(path) -def jsonpath_replace(data, value, jsonpath, pattern=None, recurse=None): +def jsonpath_replace(data, value, jsonpath, pattern=None, recurse=None, + src_pattern=None, src_match_group=0): """Update value in ``data`` at the path specified by ``jsonpath``. If the nested path corresponding to ``jsonpath`` isn't found in ``data``, @@ -246,6 +247,13 @@ def jsonpath_replace(data, value, jsonpath, pattern=None, recurse=None): a JSON path that lives closer to the nested strings in question. Optimize performance by choosing an ideal ``depth`` value; -1 will cause recursion depth to be infinite. + :param src_pattern: An optional regular expression pattern to apply to the + source ``value``. The pattern is applied using re.search(), and may + include parenthesized subgroups. Only the matched portion of ``value`` + is considered when substituting into the destination document. + :param src_match_group: The numbered subgroup of the ``src_pattern`` match + to use as the substitution source, where 0 (the default) represents the + entire match, 1 is the first parenthesized subgroup, etc. :returns: Updated value at ``data[jsonpath]``. :raises: MissingDocumentPattern if ``pattern`` is not None and ``data[jsonpath]`` doesn't exist. @@ -258,6 +266,18 @@ def jsonpath_replace(data, value, jsonpath, pattern=None, recurse=None): data_copy = copy.copy(data) value_copy = copy.copy(value) + # If a src_pattern is specified, attempt a regex match. + if src_pattern: + if not isinstance(value_copy, six.string_types): + err = 'not a string: {}' % value_copy + LOG.error(err) + raise ValueError(err) + result = re.search(src_pattern, value_copy) + if not result: + LOG.warn("no match found, using entire value") + else: + value_copy = result.group(src_match_group) + jsonpath = _normalize_jsonpath(jsonpath) recurse = recurse or {} diff --git a/deckhand/engine/schemas/metadata_document.yaml b/deckhand/engine/schemas/metadata_document.yaml index dbd24631..c4b8db27 100644 --- a/deckhand/engine/schemas/metadata_document.yaml +++ b/deckhand/engine/schemas/metadata_document.yaml @@ -107,6 +107,10 @@ data: type: string path: type: string + pattern: + type: string + match_group: + type: integer additionalProperties: false required: - schema diff --git a/deckhand/engine/secrets_manager.py b/deckhand/engine/secrets_manager.py index 6ee035ee..584837c3 100644 --- a/deckhand/engine/secrets_manager.py +++ b/deckhand/engine/secrets_manager.py @@ -242,13 +242,15 @@ class SecretsSubstitution(object): dest_doc.name) def _substitute_one(self, document, src_doc, src_secret, dest_path, - dest_pattern, dest_recurse=None): + dest_pattern, dest_recurse=None, + src_pattern=None, src_match_group=0): dest_recurse = dest_recurse or {} exc_message = '' try: substituted_data = utils.jsonpath_replace( document.data, src_secret, dest_path, - pattern=dest_pattern, recurse=dest_recurse) + pattern=dest_pattern, recurse=dest_recurse, + src_pattern=src_pattern, src_match_group=src_match_group) if (isinstance(document.data, dict) and isinstance(substituted_data, dict)): document.data.update(substituted_data) @@ -320,6 +322,8 @@ class SecretsSubstitution(object): src_schema = sub['src']['schema'] src_name = sub['src']['name'] src_path = sub['src']['path'] + src_pattern = sub['src'].get('pattern', None) + src_match_group = sub['src'].get('match_group', 0) if (src_schema, src_name) in self._substitution_sources: src_doc = self._substitution_sources[ @@ -391,6 +395,8 @@ class SecretsSubstitution(object): document, src_doc=src_doc, src_secret=src_secret, + src_pattern=src_pattern, + src_match_group=src_match_group, dest_path=dest_path, dest_pattern=dest_pattern, dest_recurse=dest_recurse) diff --git a/deckhand/tests/unit/common/test_utils.py b/deckhand/tests/unit/common/test_utils.py index 26d5987f..238745ac 100644 --- a/deckhand/tests/unit/common/test_utils.py +++ b/deckhand/tests/unit/common/test_utils.py @@ -189,6 +189,64 @@ class TestJSONPathReplace(test_base.DeckhandTestCase): recurse={'depth': 3}) self.assertEqual(expected, result) + def test_jsonpath_replace_with_src_pattern(self): + src = 'repo.example.com/image@sha256:e3b0c44298fc...' + src_pattern = 'sha256.*' + path = ".values.image.sha" + body = {"values": {}} + expected = {"values": {"image": { + "sha": "sha256:e3b0c44298fc..."}}} + result = utils.jsonpath_replace(body, src, jsonpath=path, + src_pattern=src_pattern) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_src_pattern_implicit_match_group_0(self): + src = 'repo.example.com/image:v1.2.3' + src_pattern = '^(.*):(.*)' + path = ".values.image" + body = {"values": {}} + expected = {"values": {"image": "repo.example.com/image:v1.2.3"}} + result = utils.jsonpath_replace(body, src, jsonpath=path, + src_pattern=src_pattern) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_src_pattern_match_group_0(self): + src = 'repo.example.com/image:v1.2.3' + src_pattern = '^(.*):(.*)' + src_match_group = 0 + path = ".values.image" + body = {"values": {}} + expected = {"values": {"image": "repo.example.com/image:v1.2.3"}} + result = utils.jsonpath_replace(body, src, jsonpath=path, + src_pattern=src_pattern, + src_match_group=src_match_group) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_src_pattern_match_group_1(self): + src = 'repo.example.com/image:v1.2.3' + src_pattern = '^(.*):(.*)' + src_match_group = 1 + path = ".values.image.repository" + body = {"values": {}} + expected = {"values": { + "image": {"repository": "repo.example.com/image"}}} + result = utils.jsonpath_replace(body, src, jsonpath=path, + src_pattern=src_pattern, + src_match_group=src_match_group) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_src_pattern_match_group_2(self): + src = 'repo.example.com/image:v1.2.3' + src_pattern = '^(.*):(.*)' + src_match_group = 2 + path = ".values.image.tag" + body = {"values": {}} + expected = {"values": {"image": {"tag": "v1.2.3"}}} + result = utils.jsonpath_replace(body, src, jsonpath=path, + src_pattern=src_pattern, + src_match_group=src_match_group) + self.assertEqual(expected, result) + class TestJSONPathReplaceNegative(test_base.DeckhandTestCase): """Validate JSONPath replace negative scenarios.""" diff --git a/deckhand/tests/unit/engine/test_secrets_manager.py b/deckhand/tests/unit/engine/test_secrets_manager.py index 6f1e45a3..4a3bdf0e 100644 --- a/deckhand/tests/unit/engine/test_secrets_manager.py +++ b/deckhand/tests/unit/engine/test_secrets_manager.py @@ -874,6 +874,67 @@ data: substituted_docs = list(secret_substitution.substitute_all(documents)) self.assertEqual(expected, substituted_docs[0]) + def test_doc_substitution_src_pattern(self): + image = "docker.io/library/hello-world:latest" + repo, tag = image.split(":") + test_yaml = """ +--- +# Source document. +schema: pegleg/SoftwareVersions/v1 +metadata: + schema: metadata/Document/v1 + name: software-versions + layeringDefinition: + abstract: false + layer: global + storagePolicy: cleartext +data: + images: + hello: %s +--- +# Destination document. +schema: armada/Chart/v1 +metadata: + name: example-chart-01 + schema: metadata/Document/v1 + layeringDefinition: + abstract: false + layer: global + substitutions: + - src: + schema: pegleg/SoftwareVersions/v1 + name: software-versions + path: .images.hello + pattern: '^(.*):(.*)' + match_group: 1 + dest: + path: .values.images.hello.repo + - src: + schema: pegleg/SoftwareVersions/v1 + name: software-versions + path: .images.hello + pattern: '^(.*):(.*)' + match_group: 2 + dest: + path: .values.images.hello.tag +data: + values: + images: + hello: + repo: # docker.io/library/hello-world + tag: # latest +""" % image + documents = list(yaml.safe_load_all(test_yaml)) + expected = copy.deepcopy(documents[1]) + expected['data']['values']['images']['hello']['repo'] = repo + expected['data']['values']['images']['hello']['tag'] = tag + + secret_substitution = secrets_manager.SecretsSubstitution( + documents) + substituted_docs = list(secret_substitution.substitute_all( + documents)) + self.assertEqual(expected, substituted_docs[0]) + class TestSecretsSubstitutionNegative(test_base.DeckhandWithDBTestCase): diff --git a/doc/source/users/substitution.rst b/doc/source/users/substitution.rst index 7554c4e2..41061e71 100644 --- a/doc/source/users/substitution.rst +++ b/doc/source/users/substitution.rst @@ -380,6 +380,84 @@ depth. Any other positive integer will specify how many levels deep to recurse in order to optimize recursive pattern replacement. Take care to specify the required recursion depth or else too-deep patterns won't be replaced. +Source Pattern Matching (Substring Extraction) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In some cases, only a substring of the substitution source is needed in the +destination document. For example, the source document may specify a full image +path, while the destination chart requires the repo and tag as separate fields. + +This type of substitution can be accomplished with the optional parameters: +* ``src.pattern`` - a regular expression, with optional capture groups. +* ``src.match_group`` - the number of the desired capture group. + +.. note:: + + It is an error to specify ``src.pattern`` if the substitution source is not a + string (e.g. an object or an array). + +.. note:: + + If the regex does not match, a warning is logged, and the entire source + string is used. + +.. note:: + + The default ``src.match_group`` is 0 (i.e. the entire match). This allows the + use of expressions like ``sha256:.*`` without parentheses, and without + explicitly specifying a match group. + +For example, given the following source documents, the distinct values for +``repo`` and ``tag`` will be extracted from the source image: + +.. code-block:: yaml + + --- + # Source document. + schema: pegleg/SoftwareVersions/v1 + metadata: + schema: metadata/Document/v1 + name: software-versions + layeringDefinition: + abstract: false + layer: global + storagePolicy: cleartext + data: + images: + hello: docker.io/library/hello-world:latest + --- + # Destination document. + schema: armada/Chart/v1 + metadata: + name: example-chart-01 + schema: metadata/Document/v1 + layeringDefinition: + abstract: false + layer: global + substitutions: + - src: + schema: pegleg/SoftwareVersions/v1 + name: software-versions + path: .images.hello + pattern: '^(.*):(.*)' + match_group: 1 + dest: + path: .values.images.hello.repo + - src: + schema: pegleg/SoftwareVersions/v1 + name: software-versions + path: .images.hello + pattern: '^(.*):(.*)' + match_group: 2 + dest: + path: .values.images.hello.tag + data: + values: + images: + hello: + repo: # docker.io/library/hello-world + tag: # latest + Substitution of Encrypted Data ------------------------------