Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(diff): properties from ChangeSet diff were ignored #30093

Merged
merged 15 commits into from
May 10, 2024
Merged
7 changes: 4 additions & 3 deletions packages/@aws-cdk-testing/cli-integ/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@
"license": "Apache-2.0",
"devDependencies": {
"@aws-cdk/cdk-build-tools": "0.0.0",
"@types/semver": "^7.5.8",
"@types/yargs": "^15.0.19",
"@aws-cdk/pkglint": "0.0.0",
"@types/fs-extra": "^9.0.13",
"@types/glob": "^7.2.0",
"@types/npm": "^7.19.3",
"@aws-cdk/pkglint": "0.0.0"
"@types/semver": "^7.5.8",
"@types/yargs": "^15.0.19"
},
"dependencies": {
"@aws-sdk/client-ssm": "^3.569.0",
"@octokit/rest": "^18.12.0",
"aws-sdk": "^2.1610.0",
"axios": "^1.6.8",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,22 @@ class SsoInstanceAccessControlConfig extends Stack {
}
}

class DiffFromChangeSetStack extends Stack {
constructor(scope, id) {
super(scope, id);

const queueNameFromParameter = ssm.StringParameter.valueForStringParameter(this, 'for-queue-name-defined-by-ssm-param');
new sqs.Queue(this, "DiffFromChangeSetQueue", {
queueName: queueNameFromParameter,
})

new ssm.StringParameter(this, 'DiffFromChangeSetSSMParam', {
parameterName: 'DiffFromChangeSetSSMParamName',
stringValue: queueNameFromParameter,
});
}
}

class ListMultipleDependentStack extends Stack {
constructor(scope, id) {
super(scope, id);
Expand Down Expand Up @@ -658,6 +674,8 @@ switch (stackSet) {

const failed = new FailedStack(app, `${stackPrefix}-failed`)

new DiffFromChangeSetStack(app, `${stackPrefix}-queue-name-defined-by-ssm-param`)

// A stack that depends on the failed stack -- used to test that '-e' does not deploy the failing stack
const dependsOnFailed = new OutputsStack(app, `${stackPrefix}-depends-on-failed`);
dependsOnFailed.addDependency(failed);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { promises as fs, existsSync } from 'fs';
import * as os from 'os';
import * as path from 'path';
import * as SSM from '@aws-sdk/client-ssm';
import { integTest, cloneDirectory, shell, withDefaultFixture, retry, sleep, randomInteger, withSamIntegrationFixture, RESOURCES_DIR, withCDKMigrateFixture, withExtendedTimeoutFixture } from '../../lib';

jest.setTimeout(2 * 60 * 60_000); // Includes the time to acquire locks, worst-case single-threaded runtime
Expand Down Expand Up @@ -944,6 +945,57 @@ integTest('cdk diff --quiet does not print \'There were no differences\' message
expect(diff).not.toContain('There were no differences');
}));

integTest('cdk diff picks up changes that are only present in changeset', withDefaultFixture(async (fixture) => {
const ssmClient = new SSM.SSMClient();
await ssmClient.send(new SSM.PutParameterCommand(
{
Name: 'for-queue-name-defined-by-ssm-param',
Value: Math.floor(Math.random() * 100_000_000_000_001).toString(),
Overwrite: true,
Type: 'String',
},
));

// GIVEN
try {
await fixture.cdkDeploy('queue-name-defined-by-ssm-param');

// We want to change the ssm value. Then the CFN changeset will detect that the queue will be changed upon deploy.
const newRandomValueForSsmParam = Math.floor(Math.random() * 100_000_000_000_001).toString();
await ssmClient.send(new SSM.PutParameterCommand(
{
Name: 'for-queue-name-defined-by-ssm-param',
Value: newRandomValueForSsmParam,
Type: 'String',
Overwrite: true,
},
));

// WHEN
const diff = await fixture.cdk(['diff', fixture.fullStackName('queue-name-defined-by-ssm-param')]);
`
Resources
[~] AWS::SQS::Queue DiffFromChangeSetQueue DiffFromChangeSetQueue06622C07 replace
└─ [~] QueueName (requires replacement)
[~] AWS::SSM::Parameter DiffFromChangeSetSSMParam DiffFromChangeSetSSMParam92A9A723
└─ [~] Value
`;
bergjaak marked this conversation as resolved.
Show resolved Hide resolved

// THEN
// the reason these aren't just 1 line is because the terminal output includes colors, which comes up like \u001b[4m\u001b[1mResources\u001b
bergjaak marked this conversation as resolved.
Show resolved Hide resolved
// which is not very human friendly...
expect(diff).toContain('AWS::SQS::Queue');
expect(diff).toContain('DiffFromChangeSetQueue');
expect(diff).toContain('QueueName (requires replacement)');
expect(diff).toContain('AWS::SSM::Parameter');
expect(diff).toContain('DiffFromChangeSetSSMParam');
expect(diff).toContain('Value');
expect(diff).toContain('Number of stacks with differences: 1');
} finally {
await fixture.cdkDestroy('queue-name-defined-by-ssm-param');
}
}));

integTest('deploy stack with docker asset', withDefaultFixture(async (fixture) => {
await fixture.cdkDeploy('docker');
}));
Expand Down
36 changes: 27 additions & 9 deletions packages/@aws-cdk/cloudformation-diff/lib/diff-template.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ export function fullDiff(
normalize(newTemplate);
const theDiff = diffTemplate(currentTemplate, newTemplate);
if (changeSet) {
filterFalsePositives(theDiff, changeSet);
refineDiffWithChangeSet(theDiff, changeSet, newTemplate.Resources);
addImportInformation(theDiff, changeSet);
} else if (isImport) {
makeAllResourceChangesImports(theDiff);
Expand Down Expand Up @@ -143,13 +143,6 @@ function calculateTemplateDiff(currentTemplate: { [key: string]: any }, newTempl
return new types.TemplateDiff(differences);
}

/**
* Compare two CloudFormation resources and return semantic differences between them
*/
export function diffResource(oldValue: types.Resource, newValue: types.Resource): types.ResourceDifference {
return impl.diffResource(oldValue, newValue);
}

Comment on lines -146 to -152
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was never used, so I removed it

/**
* Replace all references to the given logicalID on the given template, in-place
*
Expand Down Expand Up @@ -229,8 +222,33 @@ function makeAllResourceChangesImports(diff: types.TemplateDiff) {
});
}

function filterFalsePositives(diff: types.TemplateDiff, changeSet: DescribeChangeSetOutput) {
function refineDiffWithChangeSet(diff: types.TemplateDiff, changeSet: DescribeChangeSetOutput, newTemplateResources: {[logicalId: string]: any}) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think of breaking this down into two helper function calls? This is a pretty long function now and it will be a clearer for future contributors if refineDiffWithChangeSet reads like:

function refineDiffWithChangeSet(diff: types.TemplateDiff, ...) {
  addMissingProperties(diff, changeSet);
  refineChangeImpact(diff, changeSet);
}

(the parameters will probably be different than what I've put here)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can do that if that's the style we prefer, but I'm not of the opinion that smaller functions lead to more readable code. If the code will never be executed in more than one location, then I think having the code inlined makes it more readable, since there is less indirection. For example, John Carmack wrote

To sum up:

If a function is only called from a single place, consider inlining it.

If a function is called from multiple places, see if it is possible to arrange for the work to be done in a single place, perhaps with flags, and inline that. 

(http://number-none.com/blow/john_carmack_on_inlined_code.html?utm_source=substack&utm_medium=email)

Does that make sense? Do you agree or disagree? I do prefer having the single function with comments that explain what each code block is doing, as I think that's more readable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, I think that's a great article ^. A book that endorses this opinion and that explicitly disagrees with Uncle Bob clean code style (which argues for functions no longer than 4-6 lines) is A Philosophy of Software Design

Copy link
Contributor

@comcalvi comcalvi May 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an interesting perspective. That post is generally agreeable; in particular, this:

if a lot of operations are supposed to happen in a sequential fashion, their code should follow sequentially.

The rest of the post makes a solid argument that one function called in 20 places is more likely to lead to bugs that are hard to fix than if you just inlined some code in that one spot you need it. That seems to be the general, overarching theme of the whole post; a function written in one place, with one set of assumptions, can do subtle yet catastrophic damage when called in a place with different assumptions.

This is fair. For the code here, you could argue that it's very unclear if addMissingProperties should be called before or after refineChangeImpact, since the names give no indication of the correct order. This is a bad contract, since if you call it in the wrong order, it might crash or give you wrong output.

The only thing I'm not convinced of is this:

Using large comment blocks inside the major function to delimit the minor functions is a good idea for quick scanning

There's two sides of this for me:

  1. Comments explaining what the code is doing encourage poor variable and function naming.

This is from my personal experience, so I'm curious what your perspective is on this. I've reviewed code that uses very obscure variable names, but had a brief comment above explaining what was really happening. This isn't about functions or inlining anymore; this is just a short section of code that made an API call and did something with the response. Without the comment, there would be no way of knowing what that code was supposed to do because the variable names chosen communicated what the property was in AWS, but not at all what it was being used for.

  1. Comments get stale. Code that communicates clear intent is far faster to read, understand, and fix a bug in than code that communicates no intent, but even undocumented, uncommented code is easier to understand than code with wrong comments. CDK CLI code changes frequently, with little small parts modified here and there. It's easy for a comment at the top of a long code block to become invalid farther down. This is why I always tend to prefer breaking things up into functions.

Changing gears a bit, I think the clearest way to organize functions is based on their input and output types. For this function, we need changesets and the diff the entire time, and there's no clear way to break this by type.

For me, a quick glance through both halves of this function is not very readable, but this was true both before and after this PR; it's a consequence of mutating the diff object in this way.

I see the value of keeping it all in one function. I can see one other way that combines both worlds a bit, which we sometimes use in the CDK; create some local-scoped functions that only exist within this function. This means that they can't be called anywhere else, but we can still call them in here to indicate the two halves of this function at a glance.

Either way (leaving it as you have it, or creating the local helper functions) is fine with me, I'll leave that up to you.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for 1 Comments explaining what the code is doing encourage poor variable and function naming. I think a team should hold a high standard of not allowing poor naming, or encouraging clarifying and consistent names when possible. I think having comments is a bad excuse for having poor names, especially because of your second point, that Comments get stale. I do agree with that; I think that's also a team culture thing, namely that holding PRs accountable for updating comments is important.

It also makes sense to have a centralized location for comments/documentation, so that you don't have multiple "comment states" to synchronize -- so ideally comments on implementation go right above what's being commented on or on a function/class signature that describes high level (say, business logic) details that are unlikely to change. Also, comments shouldn't explain implementation details but rather business logic that cannot be discerned from the code -- information that can be captured in code ought to be captured in the code itself, since code has lower odds of going stale. And I do think comment readers should always keep in mind the possibility that a comment they're reading is stale. Nonetheless, I am of the opinion that comments can communicate context that otherwise cannot be captured in code, and its precisely that information (cannot be captured in code) that should be commented.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ended up doing the private methods suggestion, as I think that makes both of us happy :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, comments shouldn't explain implementation details but rather business logic that cannot be discerned from the code

I agree with this sentiment, which runs a bit counter to the idea of comment blocks explaining what is happening; but I also see the danger of exposing too many functions that can make non-obvious state mutations.

const replacements = findResourceReplacements(changeSet);

// Add resources and properties that aren't in the template diff but that ARE in the changeset diff
const resourceDiffLogicalIds = diff.resources.logicalIds;
for (const logicalId of Object.keys(replacements)) {
if (!(resourceDiffLogicalIds.includes(logicalId))) {
const noChangeResourceDiff = impl.diffResource(newTemplateResources[logicalId], newTemplateResources[logicalId]);
diff.resources.add(logicalId, noChangeResourceDiff);
}

for (const propertyName of Object.keys(replacements[logicalId].propertiesReplaced)) {
if (propertyName in diff.resources.get(logicalId).propertyUpdates) {
// If the property is already marked to be updated, then we don't need to do anything.
continue;
}

const newProp = new types.PropertyDifference(
// these fields will be decided below
{}, {}, { changeImpact: undefined },
);
newProp.isDifferent = true;
diff.resources.get(logicalId).setPropertyChange(propertyName, newProp);
}
};

// Now use the changeset diff to make property changeImpact more accurate.
diff.resources.forEachDifference((logicalId: string, change: types.ResourceDifference) => {
if (change.resourceType.includes('AWS::Serverless')) {
// CFN applies the SAM transform before creating the changeset, so the changeset contains no information about SAM resources
Expand Down
4 changes: 4 additions & 0 deletions packages/@aws-cdk/cloudformation-diff/lib/diff/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,10 @@ export class PropertyDifference<ValueType> extends Difference<ValueType> {
export class DifferenceCollection<V, T extends IDifference<V>> {
constructor(private readonly diffs: { [logicalId: string]: T }) {}

public add(logicalId: string, diff: T): void {
this.diffs[logicalId] = diff;
}

public get changes(): { [logicalId: string]: T } {
return onlyChanges(this.diffs);
}
Expand Down
Loading
Loading