Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better design of the CodeAndLocationProvider #1414

Merged
merged 4 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation
import de.fraunhofer.aisec.cpg.sarif.Region
import java.io.File
import java.util.*
import org.apache.commons.lang3.StringUtils
import org.slf4j.LoggerFactory

/**
Expand All @@ -43,7 +42,7 @@ import org.slf4j.LoggerFactory
* after having processed the files, i.e., it won't be available in passes.
*
* More information can be found in the
* [github wiki page](https://github.com/Fraunhofer-AISEC/cpg/wiki/Language-Frontends).
* [GitHub wiki page](https://github.com/Fraunhofer-AISEC/cpg/wiki/Language-Frontends).
*/
abstract class LanguageFrontend<AstNode, TypeNode>(
/** The language this frontend works for. */
Expand Down Expand Up @@ -109,7 +108,7 @@ abstract class LanguageFrontend<AstNode, TypeNode>(
* @param astNode the ast node
* @return the source code </T>
*/
abstract fun codeOf(astNode: AstNode): String?
abstract override fun codeOf(astNode: AstNode): String?

/**
* Returns the [Region] of the code with line and column, index starting at 1, generic for java
Expand All @@ -119,136 +118,7 @@ abstract class LanguageFrontend<AstNode, TypeNode>(
* @param astNode the ast node
* @return the location </T>
*/
abstract fun locationOf(astNode: AstNode): PhysicalLocation?

override fun setCodeAndLocation(cpgNode: Node, astNode: AstNode) {
if (config.codeInNodes) {
// only set code, if it's not already set or empty
val code = codeOf(astNode)
if (code != null) {
cpgNode.code = code
} else {
log.warn("Unexpected: No code for node {}", astNode)
}
}
cpgNode.location = locationOf(astNode)
}

/**
* To prevent issues with different newline types and formatting.
*
* @param node
* - The newline type is extracted from the nodes code.
*
* @return the String of the newline
*/
fun getNewLineType(node: Node): String {
var region = node.location?.region
return getNewLineType(node.code ?: "", region)
}

/**
* To prevent issues with different newline types and formatting.
*
* @param multilineCode
* - The newline type is extracted from the code assuming it contains newlines
*
* @return the String of the newline or \n as default
*/
fun getNewLineType(multilineCode: String, region: Region? = null): String {
var code = multilineCode
region?.let {
if (it.startLine != it.endLine) {
code = code.substring(0, code.length - it.endColumn + 1)
}
}

val nls = listOf("\n\r", "\r\n", "\n")
for (nl in nls) {
if (code.endsWith(nl)) {
return nl
}
}
log.debug("Could not determine newline type. Assuming \\n.")
return "\n"
}

/**
* Returns the code represented by the subregion extracted from the parent node and its region.
*
* @param node
* - The parent node of the subregion
*
* @param nodeRegion
* - region needs to be precomputed.
*
* @param subRegion
* - precomputed subregion
*
* @return the code of the subregion.
*/
fun getCodeOfSubregion(node: Node, nodeRegion: Region, subRegion: Region): String {
val code = node.code ?: return ""
return getCodeOfSubregion(code, nodeRegion, subRegion)
}

fun getCodeOfSubregion(code: String, nodeRegion: Region, subRegion: Region): String {
val nlType = getNewLineType(code, nodeRegion)
val start =
if (subRegion.startLine == nodeRegion.startLine) {
subRegion.startColumn - nodeRegion.startColumn
} else {
(StringUtils.ordinalIndexOf(
code,
nlType,
subRegion.startLine - nodeRegion.startLine
) + subRegion.startColumn)
}
val end =
if (subRegion.endLine == nodeRegion.startLine) {
subRegion.endColumn - nodeRegion.startColumn
} else {
(StringUtils.ordinalIndexOf(
code,
nlType,
subRegion.endLine - nodeRegion.startLine
) + subRegion.endColumn)
}
return code.substring(start, end)
}

/**
* Merges two regions. The new region contains both and is the minimal region to do so.
*
* @param regionOne the first region
* @param regionTwo the second region
* @return the merged region
*/
fun mergeRegions(regionOne: Region, regionTwo: Region): Region {
val ret = Region()
if (
regionOne.startLine < regionTwo.startLine ||
regionOne.startLine == regionTwo.startLine &&
regionOne.startColumn < regionTwo.startColumn
) {
ret.startLine = regionOne.startLine
ret.startColumn = regionOne.startColumn
} else {
ret.startLine = regionTwo.startLine
ret.startColumn = regionTwo.startColumn
}
if (
regionOne.endLine > regionTwo.endLine ||
regionOne.endLine == regionTwo.endLine && regionOne.endColumn > regionTwo.endColumn
) {
ret.endLine = regionOne.endLine
ret.endColumn = regionOne.startColumn
} else {
ret.endLine = regionTwo.endLine
ret.endColumn = regionTwo.endColumn
}
return ret
}
abstract override fun locationOf(astNode: AstNode): PhysicalLocation?

open fun cleanup() {
clearProcessed()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ import de.fraunhofer.aisec.cpg.graph.NodeBuilder.log
import de.fraunhofer.aisec.cpg.graph.scopes.Scope
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import de.fraunhofer.aisec.cpg.graph.types.*
import de.fraunhofer.aisec.cpg.helpers.getCodeOfSubregion
import de.fraunhofer.aisec.cpg.passes.inference.IsImplicitProvider
import de.fraunhofer.aisec.cpg.passes.inference.IsInferredProvider
import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation
import de.fraunhofer.aisec.cpg.sarif.Region
import java.net.URI
import org.slf4j.LoggerFactory

object NodeBuilder {
Expand All @@ -63,10 +65,14 @@ interface LanguageProvider : MetadataProvider {

/**
* This interface denotes that the class is able to provide source code and location information for
* a specific node and set it using the [setCodeAndLocation] function.
* a specific node.
*/
interface CodeAndLocationProvider<in AstNode> : MetadataProvider {
fun setCodeAndLocation(cpgNode: Node, astNode: AstNode)
/** Returns the raw code of the supplied [AstNode]. */
fun codeOf(astNode: AstNode): String?

/** Returns the [PhysicalLocation] of the supplied [AstNode]. */
fun locationOf(astNode: AstNode): PhysicalLocation?
}

/**
Expand Down Expand Up @@ -102,6 +108,14 @@ fun Node.applyMetadata(
localNameOnly: Boolean = false,
defaultNamespace: Name? = null,
) {
// We definitely need a context provider, because otherwise we cannot set the context and the
// node cannot access necessary information about the current translation context it lives in.
this.ctx =
(provider as? ContextProvider)?.ctx
?: throw TranslationException(
"Trying to create a node without a ContextProvider. This will fail."
)

// We try to set the code and especially the location as soon as possible because the hashCode
// implementation of the Node class relies on it. Otherwise, we could have a problem that the
// location is not yet set, but the node is put into a hashmap. In this case the hashCode is
Expand All @@ -110,7 +124,7 @@ fun Node.applyMetadata(
// calling a node builder from these should already set the location.
if (provider is CodeAndLocationProvider<*> && rawNode != null) {
@Suppress("UNCHECKED_CAST")
(provider as CodeAndLocationProvider<Any>).setCodeAndLocation(this, rawNode)
setCodeAndLocation(provider as CodeAndLocationProvider<Any>, rawNode)
}

if (provider is LanguageProvider) {
Expand All @@ -134,16 +148,6 @@ fun Node.applyMetadata(
)
}

if (provider is ContextProvider) {
this.ctx = provider.ctx
}

if (this.ctx == null) {
throw TranslationException(
"Trying to create a node without a ContextProvider. This will fail."
)
}

if (name != null) {
val namespace =
if (provider is NamespaceProvider) {
Expand Down Expand Up @@ -266,9 +270,19 @@ fun <T : Node> T.codeAndLocationFrom(other: Node): T {
return this
}

fun <T : Node, S> T.codeAndLocationFrom(frontend: LanguageFrontend<S, *>, rawNode: S): T {
frontend.setCodeAndLocation(this, rawNode)
/**
* Sometimes we need to explicitly (re)set the code and location of a node to another raw node than
* originally used in the node builder. A common use-case for that is languages that contain
* expression statements, which we simplify to simple expressions. But in these languages, the
* expression often does not contain a semicolon at the end, where-as the statement does. In this
* case we want to preserve the original code containing the semicolon and need to set the node's
* code/location to the statement rather than the expression, after it comes back from the
* expression handler.
*/
context(CodeAndLocationProvider<AstNode>)

fun <T : Node, AstNode> T.codeAndLocationFromOtherRawNode(rawNode: AstNode): T {
setCodeAndLocation(this@CodeAndLocationProvider, rawNode)
return this
}

Expand All @@ -282,13 +296,11 @@ fun <T : Node, S> T.codeAndLocationFrom(frontend: LanguageFrontend<S, *>, rawNod
* code is extracted from the parent node to catch separators and auxiliary syntactic elements that
* are between the child nodes.
*
* @param frontend Used to invoke language specific code and location generation
* @param parentNode Used to extract the code for this node
*/
fun <T : Node, S> T.codeAndLocationFromChildren(
frontend: LanguageFrontend<S, *>,
parentNode: S
): T {
context(CodeAndLocationProvider<AstNode>)

fun <T : Node, AstNode> T.codeAndLocationFromChildren(parentNode: AstNode): T {
var first: Node? = null
var last: Node? = null

Expand All @@ -297,7 +309,7 @@ fun <T : Node, S> T.codeAndLocationFromChildren(
val worklist: MutableList<Node> = this.astChildren.toMutableList()
while (worklist.isNotEmpty()) {
val current = worklist.removeFirst()
if (current.location?.region == null || current.location?.region == Region()) {
if (current.location == null || current.location?.region == Region()) {
// If the node has no location we use the same search on his children again
worklist.addAll(current.astChildren)
} else {
Expand Down Expand Up @@ -337,15 +349,36 @@ fun <T : Node, S> T.codeAndLocationFromChildren(
endLine = last.location?.region?.endLine ?: -1,
endColumn = last.location?.region?.endColumn ?: -1,
)
this.location?.region = newRegion
this.location =
PhysicalLocation(first.location?.artifactLocation?.uri ?: URI(""), newRegion)

val parentCode = frontend.codeOf(parentNode)
val parentRegion = frontend.locationOf(parentNode)?.region
val parentCode = this@CodeAndLocationProvider.codeOf(parentNode)
val parentRegion = this@CodeAndLocationProvider.locationOf(parentNode)?.region
if (parentCode != null && parentRegion != null) {
// If the parent has code and region the new region is used to extract the code
this.code = frontend.getCodeOfSubregion(parentCode, parentRegion, newRegion)
this.code = getCodeOfSubregion(parentCode, parentRegion, newRegion)
}
}

return this
}

/**
* This internal function sets the code and location according to the [CodeAndLocationProvider].
* This also performs some checks, e.g., if the config disabled setting the code.
*/
private fun <AstNode> Node.setCodeAndLocation(
provider: CodeAndLocationProvider<AstNode>,
rawNode: AstNode
) {
if (this.ctx?.config?.codeInNodes == true) {
// only set code, if it's not already set or empty
val code = provider.codeOf(rawNode)
if (code != null) {
this.code = code
} else {
LOGGER.warn("Unexpected: No code for node {}", rawNode)
}
}
this.location = provider.locationOf(rawNode)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Copyright (c) 2024, Fraunhofer AISEC. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $$$$$$\ $$$$$$$\ $$$$$$\
* $$ __$$\ $$ __$$\ $$ __$$\
* $$ / \__|$$ | $$ |$$ / \__|
* $$ | $$$$$$$ |$$ |$$$$\
* $$ | $$ ____/ $$ |\_$$ |
* $$ | $$\ $$ | $$ | $$ |
* \$$$$$ |$$ | \$$$$$ |
* \______/ \__| \______/
*
*/
package de.fraunhofer.aisec.cpg.helpers

import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend
import de.fraunhofer.aisec.cpg.sarif.Region
import org.apache.commons.lang3.StringUtils

/**
* To prevent issues with different newline types and formatting.
*
* @param multilineCode The newline type is extracted from the code assuming it contains newlines
* @return the String of the newline or \n as default
*/
fun getNewLineType(multilineCode: String, region: Region? = null): String {
var code = multilineCode
region?.let {
if (it.startLine != it.endLine) {
code = code.substring(0, code.length - it.endColumn + 1)
}
}

val nls = listOf("\n\r", "\r\n", "\n")
for (nl in nls) {
if (code.endsWith(nl)) {
return nl
}
}
LanguageFrontend.log.debug("Could not determine newline type. Assuming \\n.")
return "\n"
}

fun getCodeOfSubregion(code: String, nodeRegion: Region, subRegion: Region): String {
val nlType = getNewLineType(code, nodeRegion)
val start =
if (subRegion.startLine == nodeRegion.startLine) {
subRegion.startColumn - nodeRegion.startColumn
} else {
(StringUtils.ordinalIndexOf(code, nlType, subRegion.startLine - nodeRegion.startLine) +
subRegion.startColumn)
}
val end =
if (subRegion.endLine == nodeRegion.startLine) {
subRegion.endColumn - nodeRegion.startColumn
} else {
(StringUtils.ordinalIndexOf(code, nlType, subRegion.endLine - nodeRegion.startLine) +
subRegion.endColumn)
}
return code.substring(start, end)
}
Loading
Loading