Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[KYUUBI #5080][FLINK] Fix EmbeddedExecutorFactory not thread-safe during bootstrap #5082

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ public class EmbeddedExecutorFactory implements PipelineExecutorFactory {

private static ScheduledExecutor retryExecutor;

private static final Object bootstrapLock = new Object();

private static final long BOOTSTRAP_WAIT_INTERVAL = 10_000L;

private static final int BOOTSTRAP_WAIT_RETRIES = 3;

private static final Logger LOGGER = LoggerFactory.getLogger(EmbeddedExecutorFactory.class);

public EmbeddedExecutorFactory() {
Expand Down Expand Up @@ -79,13 +85,17 @@ public EmbeddedExecutorFactory(
checkState(EmbeddedExecutorFactory.submittedJobIds == null);
checkState(EmbeddedExecutorFactory.dispatcherGateway == null);
checkState(EmbeddedExecutorFactory.retryExecutor == null);
// submittedJobIds would be always 1, because we create a new list to avoid concurrent access
// issues
EmbeddedExecutorFactory.submittedJobIds =
new ConcurrentLinkedQueue<>(checkNotNull(submittedJobIds));
EmbeddedExecutorFactory.bootstrapJobIds = submittedJobIds;
EmbeddedExecutorFactory.dispatcherGateway = checkNotNull(dispatcherGateway);
EmbeddedExecutorFactory.retryExecutor = checkNotNull(retryExecutor);
synchronized (bootstrapLock) {
// submittedJobIds would be always 1, because we create a new list to avoid concurrent access
// issues
LOGGER.debug("Bootstrapping EmbeddedExecutorFactory.");
EmbeddedExecutorFactory.submittedJobIds =
new ConcurrentLinkedQueue<>(checkNotNull(submittedJobIds));
EmbeddedExecutorFactory.bootstrapJobIds = submittedJobIds;
EmbeddedExecutorFactory.dispatcherGateway = checkNotNull(dispatcherGateway);
EmbeddedExecutorFactory.retryExecutor = checkNotNull(retryExecutor);
bootstrapLock.notifyAll();
}
}

@Override
Expand All @@ -96,7 +106,7 @@ public String getName() {
@Override
public boolean isCompatibleWith(final Configuration configuration) {
// override Flink's implementation to allow usage in Kyuubi
LOGGER.debug("matching execution target: {}", configuration.get(DeploymentOptions.TARGET));
LOGGER.debug("Matching execution target: {}", configuration.get(DeploymentOptions.TARGET));
return configuration.get(DeploymentOptions.TARGET).equalsIgnoreCase("yarn-application")
&& configuration.toMap().getOrDefault("yarn.tags", "").toLowerCase().contains("kyuubi");
}
Expand All @@ -105,11 +115,30 @@ public boolean isCompatibleWith(final Configuration configuration) {
public PipelineExecutor getExecutor(final Configuration configuration) {
checkNotNull(configuration);
Collection<JobID> executorJobIDs;
synchronized (bootstrapLock) {
// wait in a loop to avoid spurious wakeups
int retry = 0;
while (bootstrapJobIds == null && retry < BOOTSTRAP_WAIT_RETRIES) {
try {
LOGGER.debug("Waiting for bootstrap to complete. Wait retries: {}.", retry);
bootstrapLock.wait(BOOTSTRAP_WAIT_INTERVAL);
retry++;
} catch (InterruptedException e) {
throw new RuntimeException("Interrupted while waiting for bootstrap.", e);
}
}
if (bootstrapJobIds == null) {
throw new RuntimeException(
"Bootstrap of Flink SQL engine timed out after "
+ BOOTSTRAP_WAIT_INTERVAL * BOOTSTRAP_WAIT_RETRIES
+ " ms. Please check the engine log for more details.");
}
}
if (bootstrapJobIds.size() > 0) {
LOGGER.info("Submitting new Kyuubi job. Job already submitted: {}.", submittedJobIds.size());
LOGGER.info("Submitting new Kyuubi job. Job submitted: {}.", submittedJobIds.size());
executorJobIDs = submittedJobIds;
} else {
LOGGER.info("Bootstrapping Flink SQL engine.");
LOGGER.info("Bootstrapping Flink SQL engine with the initial SQL.");
executorJobIDs = bootstrapJobIds;
}
return new EmbeddedExecutor(
Expand Down