一、整体流程
事务的目的是为了保证一批数据要么都成功,要么都失败。Flume事务分为Put事务和Take事务,Take事务发生于从channel取出一批数据到sink,如果sink失败,那么将这一批数据放回到channel;而Put事务发生于source发送数据到channel,如果出现异常,则这批数据未进行ACK,通常需要source具备重放能力。
图一:
FileChannel主要是由WAL预写日志和内存队列FlumeEventQueue组成。
图二:
二、Transaction
public interface Transaction {
// 描述transaction状态
enum TransactionState {
Started, Committed, RolledBack, Closed }
void begin();
void commit();
void rollback();
void close();
}
图三:
三、Put()
Put事务发生在source往channel写入数据
// source收集到events后,交由channel处理
getChannelProcessor().processEventBatch(events);
public void processEventBatch(List<Event> events) {
Preconditions.checkNotNull(events, "Event list must not be null");
events = interceptorChain.intercept(events);
//必需通道
Map<Channel, List<Event>> reqChannelQueue =
new LinkedHashMap<Channel, List<Event>>();
//可选通道,可选通道是指在定义 SinkGroup 时可以选择添加的通道,用于将数据从 SinkGroup 中的一个 Sink 传输到另一个 Sink
//没有配置可选通道可忽略。
Map<Channel, List<Event>> optChannelQueue =
new LinkedHashMap<Channel, List<Event>>();
for (Event event : events) {
List<Channel> reqChannels = selector.getRequiredChannels(event);
for (Channel ch : reqChannels) {
List<Event> eventQueue = reqChannelQueue.get(ch);
if (eventQueue == null) {
eventQueue = new ArrayList<Event>();
reqChannelQueue.put(ch, eventQueue);
}
eventQueue.add(event);
}
List<Channel> optChannels = selector.getOptionalChannels(event);
for (Channel ch : optChannels) {
List<Event> eventQueue = optChannelQueue.get(ch);
if (eventQueue == null) {
eventQueue = new ArrayList<Event>();
optChannelQueue.put(ch, eventQueue);
}
eventQueue.add(event);
}
}
// Process required channels
for (Channel reqChannel : reqChannelQueue.keySet()) {
Transaction tx = reqChannel.getTransaction();
Preconditions.checkNotNull(tx, "Transaction object must not be null");
try {
tx.begin();
List<Event> batch = reqChannelQueue.get(reqChannel);
for (Event event : batch) {
//进入必选通道,调用put()
reqChannel.put(event);
}
tx.commit();
} catch (Throwable t) {
tx.rollback();
if (t instanceof Error) {
LOG.error("Error while writing to required channel: " + reqChannel, t);
throw (Error) t;
} else if (t instanceof ChannelException) {
throw (ChannelException) t;
} else {
throw new ChannelException("Unable to put batch on required " +
"channel: " + reqChannel, t);
}
} finally {
if (tx != null) {
tx.close();
}
}
}
// Process optional channels
for (Channel optChannel : optChannelQueue.keySet()) {
Transaction tx = optChannel.getTransaction();
Preconditions.checkNotNull(tx, "Transaction object must not be null");
try {
tx.begin();
List<Event> batch = optChannelQueue.get(optChannel);
for (Event event : batch) {
optChannel.put(event);
}
tx.commit();
} catch (Throwable t) {
tx.rollback();
LOG.error("Unable to put batch on optional channel: " + optChannel, t);
if (t instanceof Error) {
throw (Error) t;
}
} finally {
if (tx != null) {
tx.close