flink checkpoint 流程源码分析

最新推荐文章于 2025-06-02 13:59:23 发布

原创

最新推荐文章于 2025-06-02 13:59:23 发布 · 1.8k 阅读

4 ·

CC 4.0 BY-SA版权

文章标签：

#flink

本文深入探讨Flink的checkpoint流程，从CheckpointConfig配置到ExecutionGraph生成，详细解析了CheckpointCoordinator的工作机制，包括如何触发和确认checkpoint，以及TaskManager在其中的角色。文章以Flink 1.10为基础，揭示了其容错机制的核心——分布式快照的实现原理。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

flink 容错机制的核心是对数据流做连续的分布式快照(snapshots)。在系统失败时，各个算子可以从这些快照构成的检查点(checkpoint)恢复到故障之前的状态，保证即使遇到故障，作业的最终结果只被数据流中的每一条消息影响一次(exactly-once) (这里可以通过配置退化成 at least once)。生成分布式快照的机制在 “Lightweight Asynchronous Snapshots for Distributed Dataflows” 这篇文章中有详细描述。它的灵感来自于 Chandy-Lamport algorithm, 并且在 flink 的实现模型中做了调整。

快照状态的保存机制我们在 Flink 如何保存状态数据这篇文章中介绍过。本文介绍 flink 是如何进行分布式快照的。

本文代码基于 flink-1.10。

1.CheckpointConfig

flink-1.9 及之前版本只能使用 api 来进行 checkpoint 配置。flink-1.10 可以在 conf/flink-conf.yaml 或通过 -yD/-D 方式配置。
CheckpointConfig 增加了 configure(ReadableConfig configuration) 方法，并在 StreamExecutionEnvironment 初始化时调用。

// StreamExecutionEnvironment.java
public void configure(ReadableConfig configuration, ClassLoader classLoader) {
   
		...
		checkpointCfg.configure(configuration);
	}

// CheckpointConfig.java
	public void configure(ReadableConfig configuration) {
   
		configuration.getOptional(ExecutionCheckpointingOptions.CHECKPOINTING_MODE)
			.ifPresent(this::setCheckpointingMode);
		configuration.getOptional(ExecutionCheckpointingOptions.CHECKPOINTING_INTERVAL)
			.ifPresent(i -> this.setCheckpointInterval(i.toMillis()));
		configuration.getOptional(ExecutionCheckpointingOptions.CHECKPOINTING_TIMEOUT)
			.ifPresent(t -> this.setCheckpointTimeout(t.toMillis()));
		configuration.getOptional(ExecutionCheckpointingOptions.MAX_CONCURRENT_CHECKPOINTS)
			.ifPresent(this::setMaxConcurrentCheckpoints);
		configuration.getOptional(ExecutionCheckpointingOptions.MIN_PAUSE_BETWEEN_CHECKPOINTS)
			.ifPresent(m -> this.setMinPauseBetweenCheckpoints(m.toMillis()));
		configuration.getOptional(ExecutionCheckpointingOptions.PREFER_CHECKPOINT_FOR_RECOVERY)
			.ifPresent(this::setPreferCheckpointForRecovery);
		configuration.getOptional(ExecutionCheckpointingOptions.TOLERABLE_FAILURE_NUMBER)
			.ifPresent(this::setTolerableCheckpointFailureNumber);
		configuration.getOptional(ExecutionCheckpointingOptions.EXTERNALIZED_CHECKPOINT)
			.ifPresent(this::enableExternalizedCheckpoints);
	}

2.生成 StreamGraph

在 StreamGraphGenerator 生成 StreamGraph 时，CheckpointConfig 直接传递给 StreamGraph。

// StreamGraphGenerator.java
public StreamGraph generate() {
   
		streamGraph = new StreamGraph(executionConfig, checkpointConfig, savepointRestoreSettings);
		...

		return builtStreamGraph;
	}

3.生成 JobGraph

StreamGraph 转换成 JobGraph 时，定义了三种顶点：

triggerVertices: 需要 “触发 checkpoint” 的顶点，后续 CheckpointCoordinator 发起 checkpoint 时，只有这些点会收到 trigger checkpoint 消息。只有 source 顶点会成为 triggerVertices.
ackVertices: 需要在 snapshot 完成后，向 CheckpointCoordinator 发送 ack 消息的顶点。所有顶点都是 ackVertices.
commitVertices: 需要在 checkpoint 完成后，收到 CheckpointCoordinator “notifyCheckpointComplete” 消息的顶点。所有顶点都是 commitVertices.

其次，还生成 CheckpointCoordinatorConfiguration，CheckpointCoordinator 初始化时会用到。

// StreamingJobGraphGenerator.java
private void configureCheckpointing() {
   
		CheckpointConfig cfg = streamGraph.getCheckpointConfig();

		long interval = cfg.getCheckpointInterval();
		if (interval < MINIMAL_CHECKPOINT_TIME) {
   
			// interval of max value means disable periodic checkpoint
			interval = Long.MAX_VALUE;
		}

		//  --- configure the participating vertices ---

		// collect the vertices that receive "trigger checkpoint" messages.
		// currently, these are all the sources
		List<JobVertexID> triggerVertices = new ArrayList<>();

		// collect the vertices that need to acknowledge the checkpoint
		// currently, these are all vertices
		List<JobVertexID> ackVertices = new ArrayList<>(jobVertices.size());

		// collect the vertices that receive "commit checkpoint" messages
		// currently, these are all vertices
		List<JobVertexID> commitVertices = new ArrayList<>(jobVertices.size());

		for (JobVertex vertex : jobVertices.values()) {
   
			if (vertex.isInputVertex()) {
   
				triggerVertices.add(vertex.getID());
			}
			commitVertices.add(vertex.getID());
			ackVertices.add(vertex.getID());
		}

		//  --- configure options ---

		CheckpointRetentionPolicy retentionAfterTermination;
		if (cfg.isExternalizedCheckpointsEnabled()) {
   
			CheckpointConfig.ExternalizedCheckpointCleanup cleanup = cfg.getExternalizedCheckpointCleanup();
			// Sanity check
			if (cleanup == null) {
   
				throw new IllegalStateException("Externalized checkpoints enabled, but no cleanup mode configured.");
			}
			retentionAfterTermination = cleanup.deleteOnCancellation() ?
					CheckpointRetentionPolicy.RETAIN_ON_FAILURE :
					CheckpointRetentionPolicy.RETAIN_ON_CANCELLATION;
		} else {
   
			retentionAfterTermination = CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION;
		}

		CheckpointingMode mode = cfg.getCheckpointingMode();

		boolean isExactlyOnce;
		if (mode == CheckpointingMode.EXACTLY_ONCE) {
   
			isExactlyOnce = cfg.isCheckpointingEnabled();
		} else if (mode == CheckpointingMode.AT_LEAST_ONCE) {
   
			isExactlyOnce = false;
		} else {
   
			throw new IllegalStateException("Unexpected checkpointing mode. " +
				"Did not expect there to be another checkpointing mode besides " +
				"exactly-once or at-least-once.");
		}

		//  --- configure the master-side checkpoint hooks ---

		final ArrayList<MasterTriggerRestoreHook.Factory> hooks = new ArrayList<>();

		for (StreamNode node : streamGraph.getStreamNodes()) {
   
			if (node.getOperatorFactory() instanceof UdfStreamOperatorFactory) {
   
				Function f = ((UdfStreamOperatorFactory) node.getOperatorFactory()).getUserFunction();

				if (f instanceof WithMasterCheckpointHook) {
   
					hooks.add(new FunctionMasterCheckpointHookFactory((WithMasterCheckpointHook<?>) f));
				}
			}
		}

		// because the hooks can have user-defined code, they need to be stored as
		// eagerly serialized values
		final SerializedValue<MasterTriggerRestoreHook.Factory[]> serializedHooks;
		if (hooks.isEmpty()) {
   
			serializedHooks = null;
		} else {
   
			try {
   
				MasterTriggerRestoreHook.Factory[] asArray =
						hooks.toArray(new MasterTriggerRestoreHook.Factory[hooks.size()]);
				serializedHooks = new SerializedValue<>(asArray);
			}
			catch (IOException e) {
   
				throw new FlinkRuntimeException("Trigger/restore hook is not serializable", e);
			}
		}

		// because the state backend can have user-defined code, it needs to be stored as
		// eagerly serialized value
		final SerializedValue<StateBackend> serializedStateBackend;
		if (streamGraph.getStateBackend() == null) {
   
			serializedStateBackend = null;
		} else {
   
			try {
   
				serializedStateBackend =
					new SerializedValue<StateBackend>(streamGraph.getStateBackend());
			}
			catch (IOException e) {
   
				throw new FlinkRuntimeException("State backend is not serializable", e);
			}
		}

		//  --- done, put it all together ---

		JobCheckpointingSettings settings = new JobCheckpointingSettings(
			triggerVertices,
			ackVertices,
			commitVertices,
			new CheckpointCoordinatorConfiguration(
				interval,
				cfg.getCheckpointTimeout(),
				cfg.getMinPauseBetweenCheckpoints(),
				cfg.getMaxConcurrentCheckpoints(),
				retentionAfterTermination,
				isExactlyOnce,
				cfg.isPreferCheckpointForRecovery(),
				cfg.getTolerableCheckpointFailureNumber()),
			serializedStateBackend,
			serializedHooks);

		jobGraph.setSnapshotSettings(settings);
	}

4.生成 ExecutionGraph

// ExecutionGraph.java
public void enableCheckpointing(
			CheckpointCoordinatorConfiguration chkConfig,
			List<ExecutionJobVertex> verticesToTrigger,
			List<ExecutionJobVertex> verticesToWaitFor,
			List<ExecutionJobVertex> verticesToCommitTo,
			List<MasterTriggerRestoreHook<?>> masterHooks,
			CheckpointIDCounter checkpointIDCounter,
			CompletedCheckpointStore checkpointStore,
			StateBackend checkpointStateBackend,
			CheckpointStatsTracker statsTracker) {
   

		checkState(state == JobStatus.CREATED, "Job must be in CREATED state");
		checkState(checkpointCoordinator == null, "checkpointing already enabled");

		ExecutionVertex[] tasksToTrigger = collectExecutionVertices(verticesToTrigger);
		ExecutionVertex[] tasksToWaitFor = collectExecutionVertices(verticesToWaitFor);
		ExecutionVertex[] tasksToCommitTo = collectExecutionVertices(verticesToCommitTo);

		checkpointStatsTracker = checkNotNull(statsTracker, "CheckpointStatsTracker");

		CheckpointFailureManager failureManager = new CheckpointFailureManager(
			chkConfig.getTolerableCheckpointFailureNumber(),
			new CheckpointFailureManager.FailJobCallback() {
   
				@Override
				public void failJob(Throwable cause) {
   
					getJ