SlideShare a Scribd company logo
MANCHESTER LONDON NEW YORK
Martin Zapletal @zapletal_martin
#ScalaDays
Data in Motion: Streaming Static Data Efficiently
in Akka Persistence (and elsewhere)
@cakesolutions
Data at scale
● Reactive
● Real time, asynchronous and message driven
● Elastic and scalable
● Resilient and fault tolerant
Streams
persistence_id1, event 2
persistence_id1, event 3
persistence_id1, event 4
persistence_id1, event 1
2
35
Akka Persistence
1 4
class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor {
override val persistenceId: String = extractId(self.path.name)
override def receiveCommand: Receive = active(initialState)
private def active(
balance: State
): Receive = {
case command: AccountCommand => command match {
case cmd: UpdateBalanceCommand =>
cmd.validate().fold({ balanceUpdated =>
persist(balanceUpdated) { persisted =>
val updatedState = balance.update(persisted)
sender() ! updatedState
context.become(active(updatedState))
}
},
processValidationErrors)
...
}
}
}
class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor {
override val persistenceId: String = extractId(self.path.name)
override def receiveCommand: Receive = active(initialState)
private def active(
balance: State
): Receive = {
case command: AccountCommand => command match {
case cmd: UpdateBalanceCommand =>
cmd.validate().fold({ balanceUpdated =>
persist(balanceUpdated) { persisted =>
val updatedState = balance.update(persisted)
sender() ! updatedState
context.become(active(updatedState))
}
},
processValidationErrors)
...
}
}
}
class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor {
override val persistenceId: String = extractId(self.path.name)
override def receiveCommand: Receive = active(initialState)
private def active(
balance: State
): Receive = {
case command: AccountCommand => command match {
case cmd: UpdateBalanceCommand =>
cmd.validate().fold({ balanceUpdated =>
persist(balanceUpdated) { persisted =>
val updatedState = balance.update(persisted)
sender() ! updatedState
context.become(active(updatedState))
}
},
processValidationErrors)
...
}
}
}
class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor {
override val persistenceId: String = extractId(self.path.name)
override def receiveCommand: Receive = active(initialState)
private def active(
balance: State
): Receive = {
case command: AccountCommand => command match {
case cmd: UpdateBalanceCommand =>
cmd.validate().fold({ balanceUpdated =>
persist(balanceUpdated) { persisted =>
val updatedState = balance.update(persisted)
sender() ! updatedState
context.become(active(updatedState))
}
},
processValidationErrors)
...
}
}
}
class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor {
override val persistenceId: String = extractId(self.path.name)
override def receiveCommand: Receive = active(initialState)
private def active(
balance: State
): Receive = {
case command: AccountCommand => command match {
case cmd: UpdateBalanceCommand =>
cmd.validate().fold({ balanceUpdated =>
persist(balanceUpdated) { persisted =>
val updatedState = balance.update(persisted)
sender() ! updatedState
context.become(active(updatedState))
}
},
processValidationErrors)
...
}
}
}
case cmd: UpdateGroupBalanceCommand =>
cmd.validate().fold({ groupBalanceUpdated =>
persist(Tagged(groupBalanceUpdated, Set("tag1"))) { persisted =>
sender() ! groupBalanceUpdated
}
},
processValidationErrors)
case cmd: UpdateGroupBalanceCommand =>
cmd.validate().fold({ groupBalanceUpdated =>
persist(Tagged(groupBalanceUpdated, Set("tag1"))) { persisted =>
sender() ! groupBalanceUpdated
}
},
processValidationErrors)
override def receiveRecover: Receive = {
var state: State = initialState
{
case balanceUpdated: BalanceUpdatedEvent =>
state = state.update(balanceUpdated)
case RecoveryCompleted =>
context.become(active(state))
}
}
override def receiveRecover: Receive = {
var state: State = initialState
{
case balanceUpdated: BalanceUpdatedEvent =>
state = state.update(balanceUpdated)
case RecoveryCompleted =>
context.become(active(state))
}
}
0
1
2
3
4
0
5
10
1
5
Inserted value 0
Inserted value 5
Inserted value 10
Inserted value 1
Inserted value 55
Log data structure
Persistence_ id partition_nr
0 0
0 1
event 1
event 100 event 101 event 102
event 0 event 2
1 0 event 0 event 1 event 2
Akka Persistence Cassandra
● Purely pull
● Event (log) data
Akka Persistence Query
● eventsByPersistenceId, allPersistenceIds, eventsByTag
1 4 2
35
persistence_id1, event 2
persistence_id1, event 3
persistence_id1, event 4
persistence_id1, event 1
implicit val system: ActorSystem = ...
implicit val materializer: Materializer = ...
lazy val queries: CassandraReadJournal =
PersistenceQuery(system)
.readJournalFor[CassandraReadJournal]("cassandra-query-journal")
queries
.eventsByPersistenceId(persistenceId, 0, Long.MaxValue)
.runForeach(println)
implicit val system: ActorSystem = ...
implicit val materializer: Materializer = ...
lazy val queries: CassandraReadJournal =
PersistenceQuery(system)
.readJournalFor[CassandraReadJournal]("cassandra-query-journal")
queries
.eventsByPersistenceId(persistenceId, 0, Long.MaxValue)
.runForeach(println)
implicit val system: ActorSystem = ...
implicit val materializer: Materializer = ...
lazy val queries: CassandraReadJournal =
PersistenceQuery(system)
.readJournalFor[CassandraReadJournal]("cassandra-query-journal")
queries
.eventsByPersistenceId(persistenceId, 0, Long.MaxValue)
.runForeach(println)
EventEnvelope(1,persistenceId,1,GroupBalanceUpdatedEvent(9248.0))
EventEnvelope(2,persistenceId,2,BalanceUpdatedEvent(4355.0))
EventEnvelope(3,persistenceId,3,BalanceUpdatedEvent(5245.0))
EventEnvelope(4,persistenceId,4,BalanceUpdatedEvent(4631.0))
EventEnvelope(5,persistenceId,5,BalanceUpdatedEvent(973.0))
...
implicit val system: ActorSystem = ...
implicit val materializer: Materializer = ...
lazy val queries: CassandraReadJournal =
PersistenceQuery(system)
.readJournalFor[CassandraReadJournal]("cassandra-query-journal")
queries
.allPersistenceIds()
.runForeach(println)
implicit val system: ActorSystem = ...
implicit val materializer: Materializer = ...
lazy val queries: CassandraReadJournal =
PersistenceQuery(system)
.readJournalFor[CassandraReadJournal]("cassandra-query-journal")
queries
.allPersistenceIds()
.runForeach(println)
persistenceId5
persistenceId2
persistenceId4
persistenceId1
persistenceId4
...
implicit val system: ActorSystem = ...
implicit val materializer: Materializer = ...
lazy val queries: CassandraReadJournal =
PersistenceQuery(system)
.readJournalFor[CassandraReadJournal]("cassandra-query-journal")
queries
.eventsByTag("tag1", 0)
.runForeach(println)
implicit val system: ActorSystem = ...
implicit val materializer: Materializer = ...
lazy val queries: CassandraReadJournal =
PersistenceQuery(system)
.readJournalFor[CassandraReadJournal]("cassandra-query-journal")
queries
.eventsByTag("tag1", 0)
.runForeach(println)
implicit val system: ActorSystem = ...
implicit val materializer: Materializer = ...
lazy val queries: CassandraReadJournal =
PersistenceQuery(system).readJournalFor[CassandraReadJournal]("cassandra-query-journal")
val transform = Flow[EventEnvelope]
.collect { case EventEnvelope(_, _, _, BalanceUpdatedEvent(value)) => value }
.scan(new CircularFifoQueue[Double](5)){ (s, d) => s.add(d); s }
val g = RunnableGraph.fromGraph {
GraphDSL.create() { implicit builder: GraphDSL.Builder[NotUsed] =>
import akka.stream.scaladsl.GraphDSL.Implicits._
queries.eventsByPersistenceId(persistenceId, 0, Long.MaxValue) ~> transform ~> kafkaSink
ClosedShape
}
}
g.run()
implicit val system: ActorSystem = ...
implicit val materializer: Materializer = ...
lazy val queries: CassandraReadJournal =
PersistenceQuery(system).readJournalFor[CassandraReadJournal]("cassandra-query-journal")
val transform = Flow[EventEnvelope]
.collect { case EventEnvelope(_, _, _, BalanceUpdatedEvent(value)) => value }
.scan(new CircularFifoQueue[Double](5)){ (s, d) => s.add(d); s }
val g = RunnableGraph.fromGraph {
GraphDSL.create() { implicit builder: GraphDSL.Builder[NotUsed] =>
import akka.stream.scaladsl.GraphDSL.Implicits._
queries.eventsByPersistenceId(persistenceId, 0, Long.MaxValue) ~> transform ~> kafkaSink
ClosedShape
}
}
g.run()
implicit val system: ActorSystem = ...
implicit val materializer: Materializer = ...
lazy val queries: CassandraReadJournal =
PersistenceQuery(system).readJournalFor[CassandraReadJournal]("cassandra-query-journal")
val transform = Flow[EventEnvelope]
.collect { case EventEnvelope(_, _, _, BalanceUpdatedEvent(value)) => value }
.scan(new CircularFifoQueue[Double](5)){ (s, d) => s.add(d); s }
val g = RunnableGraph.fromGraph {
GraphDSL.create() { implicit builder: GraphDSL.Builder[NotUsed] =>
import akka.stream.scaladsl.GraphDSL.Implicits._
queries.eventsByPersistenceId(persistenceId, 0, Long.MaxValue) ~> transform ~> kafkaSink
ClosedShape
}
}
g.run()
public class AccountEntity extends PersistentEntity<AccountCommand, AccountEvent, State> {
@Override
public Behavior initialBehavior(Optional<State> snapshotState) {
BehaviorBuilder b = newBehaviorBuilder(snapshotState.orElse(initialState);
b.setCommandHandler(UpdateBalanceCommand.class, (cmd, ctx) -> {
if (! validate(cmd)) {
ctx.invalidCommand("...");
return ctx.done();
} else {
return ctx.thenPersist(
new BalanceUpdatedEvent(cmd.value), () -> ctx.reply(Done.getInstance()));
}
});
b.setEventHandler(BalanceUpdatedEvent.class, evt -> state.update(evt));
return b.build();
}
}
public class AccountEntity extends PersistentEntity<AccountCommand, AccountEvent, State> {
@Override
public Behavior initialBehavior(Optional<State> snapshotState) {
BehaviorBuilder b = newBehaviorBuilder(snapshotState.orElse(initialState);
b.setCommandHandler(UpdateBalanceCommand.class, (cmd, ctx) -> {
if (! validate(cmd)) {
ctx.invalidCommand("...");
return ctx.done();
} else {
return ctx.thenPersist(
new BalanceUpdatedEvent(cmd.value), () -> ctx.reply(Done.getInstance()));
}
});
b.setEventHandler(BalanceUpdatedEvent.class, evt -> state.update(evt));
return b.build();
}
}
public class AccountEntity extends PersistentEntity<AccountCommand, AccountEvent, State> {
@Override
public Behavior initialBehavior(Optional<State> snapshotState) {
BehaviorBuilder b = newBehaviorBuilder(snapshotState.orElse(initialState);
b.setCommandHandler(UpdateBalanceCommand.class, (cmd, ctx) -> {
if (! validate(cmd)) {
ctx.invalidCommand("...");
return ctx.done();
} else {
return ctx.thenPersist(
new BalanceUpdatedEvent(cmd.value), () -> ctx.reply(Done.getInstance()));
}
});
b.setEventHandler(BalanceUpdatedEvent.class, evt -> state.update(evt));
return b.build();
}
}
public class AccountEventProcessor extends CassandraReadSideProcessor<AccountEvent> {
AccountEventProcessor state = ...
@Override
public AggregateEventTag<AccountEvent> aggregateTag() {
return Tag1.INSTANCE;
}
@Override
public CompletionStage<Optional<UUID>> prepare(CassandraSession session) {
return prepareCreateTables(session).thenCompose(a -> 
 // Prepare tables, statements, etc.
}
@Override
public EventHandlers defineEventHandlers(EventHandlersBuilder builder) {
builder.setEventHandler(AccountEvent.class, this::processAccountEvent);
return builder.build();
}
private CompletionStage<List<BoundStatement>> processAccountEvent(AccountEvent event, UUID offset) {
BoundStatement bindWriteAnalytics = writeAnalytics.bind();
writeAnalytics.setString("entity_id", event.id);
...
return completedStatements(Arrays.asList(bindWriteAnalytics));
}
}
public class AccountEventProcessor extends CassandraReadSideProcessor<AccountEvent> {
AccountEventProcessor state = ...
@Override
public AggregateEventTag<AccountEvent> aggregateTag() {
return Tag1.INSTANCE;
}
@Override
public CompletionStage<Optional<UUID>> prepare(CassandraSession session) {
return prepareCreateTables(session).thenCompose(a -> 
 // Prepare tables, statements, etc.
}
@Override
public EventHandlers defineEventHandlers(EventHandlersBuilder builder) {
builder.setEventHandler(AccountEvent.class, this::processAccountEvent);
return builder.build();
}
private CompletionStage<List<BoundStatement>> processAccountEvent(AccountEvent event, UUID offset) {
BoundStatement bindWriteAnalytics = writeAnalytics.bind();
writeAnalytics.setString("entity_id", event.id);
...
return completedStatements(Arrays.asList(bindWriteAnalytics));
}
}
public class AccountEventProcessor extends CassandraReadSideProcessor<AccountEvent> {
AccountEventProcessor state = ...
@Override
public AggregateEventTag<AccountEvent> aggregateTag() {
return Tag1.INSTANCE;
}
@Override
public CompletionStage<Optional<UUID>> prepare(CassandraSession session) {
return prepareCreateTables(session).thenCompose(a -> 
 // Prepare tables, statements, etc.
}
@Override
public EventHandlers defineEventHandlers(EventHandlersBuilder builder) {
builder.setEventHandler(AccountEvent.class, this::processAccountEvent);
return builder.build();
}
private CompletionStage<List<BoundStatement>> processAccountEvent(AccountEvent event, UUID offset) {
BoundStatement bindWriteAnalytics = writeAnalytics.bind();
writeAnalytics.setString("entity_id", event.id);
...
return completedStatements(Arrays.asList(bindWriteAnalytics));
}
}
Streaming static data
● Turning database into a stream
Pulling data from a log
0 0
10
5 5
10
0 0
10
5 5
10
0 0
0
0 0
10
5 5
10
5 5 0
10 10 5 5 0 0
0 0
10
5 5
10
10 10 5 5 0 0
0 0
10
15 15
5 5
10
0 0
15 15
5 5 15 15 10 10 5 5 0 0
10 10
Actor publisher
private[query] abstract class QueryActorPublisher[MessageType, State: ClassTag](refreshInterval: Option[FiniteDuration])
extends ActorPublisher[MessageType] {
protected def initialState: Future[State]
protected def initialQuery(initialState: State): Future[Action]
protected def requestNext(state: State, resultSet: ResultSet): Future[Action]
protected def requestNextFinished(state: State, resultSet: ResultSet): Future[Action]
protected def updateState(state: State, row: Row): (Option[MessageType], State)
protected def completionCondition(state: State): Boolean
private[this] def nextBehavior(...): Receive = {
if (shouldFetchMore(...)) {
listenableFutureToFuture(resultSet.fetchMoreResults()).map(FetchedResultSet).pipeTo(self)
awaiting(resultSet, state, finished)
} else if (shouldIdle(...)) {
idle(resultSet, state, finished)
} else if (shouldComplete(...)) {
onCompleteThenStop()
Actor.emptyBehavior
} else if (shouldRequestMore(...)) {
if (finished) requestNextFinished(state, resultSet).pipeTo(self)
else requestNext(state, resultSet).pipeTo(self)
awaiting(resultSet, state, finished)
} else {
idle(resultSet, state, finished)
}
}
}
private[query] abstract class QueryActorPublisher[MessageType, State: ClassTag](refreshInterval: Option[FiniteDuration])
extends ActorPublisher[MessageType] {
protected def initialState: Future[State]
protected def initialQuery(initialState: State): Future[Action]
protected def requestNext(state: State, resultSet: ResultSet): Future[Action]
protected def requestNextFinished(state: State, resultSet: ResultSet): Future[Action]
protected def updateState(state: State, row: Row): (Option[MessageType], State)
protected def completionCondition(state: State): Boolean
private[this] def nextBehavior(...): Receive = {
if (shouldFetchMore(...)) {
listenableFutureToFuture(resultSet.fetchMoreResults()).map(FetchedResultSet).pipeTo(self)
awaiting(resultSet, state, finished)
} else if (shouldIdle(...)) {
idle(resultSet, state, finished)
} else if (shouldComplete(...)) {
onCompleteThenStop()
Actor.emptyBehavior
} else if (shouldRequestMore(...)) {
if (finished) requestNextFinished(state, resultSet).pipeTo(self)
else requestNext(state, resultSet).pipeTo(self)
awaiting(resultSet, state, finished)
} else {
idle(resultSet, state, finished)
}
}
}
private[query] abstract class QueryActorPublisher[MessageType, State: ClassTag](refreshInterval: Option[FiniteDuration])
extends ActorPublisher[MessageType] {
protected def initialState: Future[State]
protected def initialQuery(initialState: State): Future[Action]
protected def requestNext(state: State, resultSet: ResultSet): Future[Action]
protected def requestNextFinished(state: State, resultSet: ResultSet): Future[Action]
protected def updateState(state: State, row: Row): (Option[MessageType], State)
protected def completionCondition(state: State): Boolean
private[this] def nextBehavior(...): Receive = {
if (shouldFetchMore(...)) {
listenableFutureToFuture(resultSet.fetchMoreResults()).map(FetchedResultSet).pipeTo(self)
awaiting(resultSet, state, finished)
} else if (shouldIdle(...)) {
idle(resultSet, state, finished)
} else if (shouldComplete(...)) {
onCompleteThenStop()
Actor.emptyBehavior
} else if (shouldRequestMore(...)) {
if (finished) requestNextFinished(state, resultSet).pipeTo(self)
else requestNext(state, resultSet).pipeTo(self)
awaiting(resultSet, state, finished)
} else {
idle(resultSet, state, finished)
}
}
}
initialQuery
Cancel
initialFinishe
d
shouldFetch
More
shouldIdle
shouldTermi
nate
shouldReque
stMore
Subscription
Timeout
Cancel
Subscription
Timeout
initialNewRes
ultSet
request newResultSet
fetchedResul
tSet
finished
Cancel
Subscription
Timeout
request
continue
Red transitions
deliver buffer and update
internal state (progress)
Blue transitions
asynchronous database
query
SELECT * FROM ${tableName} WHERE
persistence_id = ? AND
partition_nr = ? AND
sequence_nr >= ? AND
sequence_nr <= ?
0 0
0 1
event 1
event 100 event 101 event 102
event 0 event 2
Events by persistence id
0 0
0 1
event 1
event 100 event 101 event 102
event 2event 0
0 0
0 1
event 1
event 100 event 101 event 102
event 2event 0
0 0
0 1
event 1
event 100 event 101 event 102
event 2event 0
0 0
0 1
event 1
event 100 event 101 event 102
event 2event 0
0 0
0 1
event 1
event 100 event 101 event 102
event 2event 0
0 0
0 1
event 0 event 1
event 100 event 101 event 102
event 2
private[query] class EventsByPersistenceIdPublisher(...)
extends QueryActorPublisher[PersistentRepr, EventsByPersistenceIdState](...) {
override protected def initialState: Future[EventsByPersistenceIdState] = {
...
EventsByPersistenceIdState(initialFromSequenceNr, 0, currentPnr)
}
override protected def updateState(
state: EventsByPersistenceIdState,
Row: Row): (Option[PersistentRepr], EventsByPersistenceIdState) = {
val event = extractEvent(row)
val partitionNr = row.getLong("partition_nr") + 1
(Some(event),
EventsByPersistenceIdState(event.sequenceNr + 1, state.count + 1, partitionNr))
}
}
private[query] class EventsByPersistenceIdPublisher(...)
extends QueryActorPublisher[PersistentRepr, EventsByPersistenceIdState](...) {
override protected def initialState: Future[EventsByPersistenceIdState] = {
...
EventsByPersistenceIdState(initialFromSequenceNr, 0, currentPnr)
}
override protected def updateState(
state: EventsByPersistenceIdState,
Row: Row): (Option[PersistentRepr], EventsByPersistenceIdState) = {
val event = extractEvent(row)
val partitionNr = row.getLong("partition_nr") + 1
(Some(event),
EventsByPersistenceIdState(event.sequenceNr + 1, state.count + 1, partitionNr))
}
}
0 0
0 1
event 1
event 100 event 101 event 102
event 0 event 2
1 0 event 0 event 1 event 2
All persistence ids
SELECT DISTINCT persistence_id, partition_nr FROM $tableName
0 0
0 1
event 1
event 100 event 101 event 102
event 0 event 2
1 0 event 0 event 1 event 2
0 0
0 1
event 1
event 100 event 101 event 102
event 0 event 2
1 0 event 0 event 1 event 2
0
0
0
1
event 1
event 100 event 101 event 102
event 0 event 2
1 0 event 0 event 1 event 2
private[query] class AllPersistenceIdsPublisher(...)
extends QueryActorPublisher[String, AllPersistenceIdsState](...) {
override protected def initialState: Future[AllPersistenceIdsState] =
Future.successful(AllPersistenceIdsState(Set.empty))
override protected def updateState(
state: AllPersistenceIdsState, row: Row): (Option[String], AllPersistenceIdsState) = {
val event = row.getString("persistence_id")
if (state.knownPersistenceIds.contains(event)) {
(None, state)
} else {
(Some(event), state.copy(knownPersistenceIds = state.knownPersistenceIds + event))
}
}
}
private[query] class AllPersistenceIdsPublisher(...)
extends QueryActorPublisher[String, AllPersistenceIdsState](...) {
override protected def initialState: Future[AllPersistenceIdsState] =
Future.successful(AllPersistenceIdsState(Set.empty))
override protected def updateState(
state: AllPersistenceIdsState, row: Row): (Option[String], AllPersistenceIdsState) = {
val event = row.getString("persistence_id")
if (state.knownPersistenceIds.contains(event)) {
(None, state)
} else {
(Some(event), state.copy(knownPersistenceIds = state.knownPersistenceIds + event))
}
}
}
Events by tag
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0 event 2,
tag 1
1 0
event 0 event 1 event 2,
tag 1
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 2,
tag 1
1 0
event 0 event 1
event 0
event 2,
tag 1
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0 event 2,
tag 1
1 0
event 1event 0 event 2,
tag 1
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0 event 2,
tag 1
1 0
event 0 event 1 event 2,
tag 1
event 0
event 0
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 2,
tag 1
1 0
event 1 event 2,
tag 1
event 0
event 0 event 1
0 0
0 1
event 100,
tag 1
event 101 event 102
event 2,
tag 1
1 0
event 2,
tag 1
event 1,
tag 1
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 2,
tag 1
1 0
event 2,
tag 1
event 0
event 0 event 1
event 1,
tag 1
event 1,
tag 1
event 2,
tag 1
event 0
event 0 event 1
event 1,
tag 10 0
0 1
event 100,
tag 1
event 101 event 102
1 0
event 2,
tag 1
event 2,
tag 1
event 0
event 0 event 1
0 0
0 1
event 100,
tag 1
event 101 event 102
1 0
event 2,
tag 1
event 1,
tag 1
0 0
0 1
1 0
event 2,
tag 1
event 0
event 0 event 1
event 100,
tag 1
event 101 event 102
event 2,
tag 1
event 1,
tag 1
Events by tag
Id 0,
event 1
Id 1,
event 2
Id 0,
event 100
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0
1 0
event 0 event 1 event 2,
tag 1
Id 0,
event 2
tag 1 1/1/2016
tag 1 1/2/2016
event 2,
tag 1
SELECT * FROM $eventsByTagViewName$tagId WHERE
tag$tagId = ? AND
timebucket = ? AND
timestamp > ? AND
timestamp <= ?
ORDER BY timestamp ASC
LIMIT ?
Id 1,
event 2
Id 0,
event 100
Id 0,
event 1
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0
Id 0,
event 2
1 0
event 0 event 1 event 2,
tag 1
tag 1 1/1/2016
tag 1 1/2/2016
event 2,
tag 1
Id 1,
event 2
Id 0,
event 100
Id 0,
event 1
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0
Id 0,
event 2
1 0
event 0 event 1 event 2,
tag 1
tag 1 1/1/2016
tag 1 1/2/2016
event 2,
tag 1
Id 0,
event 100
Id 1,
event 2
Id 0,
event 1
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0
Id 0,
event 2
1 0
event 0 event 1 event 2,
tag 1
tag 1 1/1/2016
tag 1 1/2/2016
event 2,
tag 1
Id 0,
event 100
Id 1,
event 2
Id 0,
event 1
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0
1 0
event 0 event 1 event 2,
tag 1
tag 1 1/1/2016
tag 1 1/2/2016
event 2,
tag 1
Id 0,
event 2
PRAM
MR MWRYW
Strong Serializable
Linearizable Serializable
Sequential RR SI
Causal
WFR
EC
CS MAW
RC
P-CI
PRAM
MR MWRYW
Strong Serializable
Linearizable Serializable
Sequential RR SI
Causal
WFR
EC
CS MAW
RC
P-CI
PRAM
MR MWRYW
Strong Serializable
Linearizable Serializable
Sequential RR SI
Causal
WFR
EC
CS MAW
RC
P-CI
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0
event 2,
tag 1
1 0
event 0 event 1 event 2,
tag 1
tag 1 1/1/2016
tag 1 1/2/2016
tag 1 1/1/2016
tag 1 1/2/2016
Id 0,
event 1
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0
1 0
event 0 event 1 event 2,
tag 1
persistence
_id
seq
0 1
1 . . .
event 2,
tag 1
Id 0,
event 100
Id 0,
event 1
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0
1 0
event 0 event 1 event 2,
tag 1
persistence
_id
seq
0 ?
1 . . .
event 2,
tag 1
tag 1 1/1/2016
tag 1 1/2/2016
Id 0,
event 100
Id 0,
event 2
Id 0,
event 1
0 0
0 1
event 1,
tag 1
event 100,
tag 1
event 101 event 102
event 0
1 0
event 0 event 1 event 2,
tag 1
persistence
_id
seq
0 ?
1
event 2,
tag 1
tag 1 1/1/2016
tag 1 1/2/2016
. . .
def replay(): Unit = {
val backtracking = isBacktracking
val limit =
if (backtracking) maxBufferSize
else maxBufferSize - buf.size
val toOffs =
if (backtracking && abortDeadline.isEmpty) highestOffset
else UUIDs.endOf(System.currentTimeMillis() - eventualConsistencyDelayMillis)
context.actorOf(EventsByTagFetcher.props(tag, currTimeBucket, currOffset, toOffs, limit, backtracking,
self, session, preparedSelect, seqNumbers, settings))
context.become(replaying(limit))
}
def replaying(limit: Int): Receive = {
case env @ UUIDPersistentRepr(offs, _) => // Deliver buffer
case ReplayDone(count, seqN, highest) => // Request more
case ReplayAborted(seqN, pid, expectedSeqNr, gotSeqNr) =>
// Causality violation, wait and retry. Only applicable if all events for persistence_id are tagged
case ReplayFailed(cause) => // Failure
case _: Request => // Deliver buffer
case Continue => // Do nothing
case Cancel => // Stop
}
def replay(): Unit = {
val backtracking = isBacktracking
val limit =
if (backtracking) maxBufferSize
else maxBufferSize - buf.size
val toOffs =
if (backtracking && abortDeadline.isEmpty) highestOffset
else UUIDs.endOf(System.currentTimeMillis() - eventualConsistencyDelayMillis)
context.actorOf(EventsByTagFetcher.props(tag, currTimeBucket, currOffset, toOffs, limit, backtracking,
self, session, preparedSelect, seqNumbers, settings))
context.become(replaying(limit))
}
def replaying(limit: Int): Receive = {
case env @ UUIDPersistentRepr(offs, _) => // Deliver buffer
case ReplayDone(count, seqN, highest) => // Request more
case ReplayAborted(seqN, pid, expectedSeqNr, gotSeqNr) =>
// Causality violation, wait and retry. Only applicable if all events for persistence_id are tagged
case ReplayFailed(cause) => // Failure
case _: Request => // Deliver buffer
case Continue => // Do nothing
case Cancel => // Stop
}
Akka Persistence Cassandra Replay
def asyncReplayMessages(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long)
(replayCallback: (PersistentRepr) => Unit): Future[Unit] = Future {
new MessageIterator(persistenceId, fromSequenceNr, toSequenceNr, max).foreach(msg => {
replayCallback(msg)
})
}
class MessageIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) extends Iterator
[PersistentRepr] {
private val initialFromSequenceNr = math.max(highestDeletedSequenceNumber(persistenceId) + 1, fromSequenceNr)
private val iter = new RowIterator(persistenceId, initialFromSequenceNr, toSequenceNr)
private var mcnt = 0L
private var c: PersistentRepr = null
private var n: PersistentRepr = PersistentRepr(Undefined)
fetch()
def hasNext: Boolean = ...
def next(): PersistentRepr = 

...
}
Akka Persistence Cassandra Replay
def asyncReplayMessages(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long)
(replayCallback: (PersistentRepr) => Unit): Future[Unit] = Future {
new MessageIterator(persistenceId, fromSequenceNr, toSequenceNr, max).foreach(msg => {
replayCallback(msg)
})
}
class MessageIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) extends Iterator
[PersistentRepr] {
private val initialFromSequenceNr = math.max(highestDeletedSequenceNumber(persistenceId) + 1, fromSequenceNr)
private val iter = new RowIterator(persistenceId, initialFromSequenceNr, toSequenceNr)
private var mcnt = 0L
private var c: PersistentRepr = null
private var n: PersistentRepr = PersistentRepr(Undefined)
fetch()
def hasNext: Boolean = ...
def next(): PersistentRepr = 

...
}
Akka Persistence Cassandra Replay
def asyncReplayMessages(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long)
(replayCallback: (PersistentRepr) => Unit): Future[Unit] = Future {
new MessageIterator(persistenceId, fromSequenceNr, toSequenceNr, max).foreach(msg => {
replayCallback(msg)
})
}
class MessageIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) extends Iterator
[PersistentRepr] {
private val initialFromSequenceNr = math.max(highestDeletedSequenceNumber(persistenceId) + 1, fromSequenceNr)
private val iter = new RowIterator(persistenceId, initialFromSequenceNr, toSequenceNr)
private var mcnt = 0L
private var c: PersistentRepr = null
private var n: PersistentRepr = PersistentRepr(Undefined)
fetch()
def hasNext: Boolean = ...
def next(): PersistentRepr = 

...
}
class RowIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long) extends Iterator[Row] {
var currentPnr = partitionNr(fromSequenceNr)
var currentSnr = fromSequenceNr
var fromSnr = fromSequenceNr
var toSnr = toSequenceNr
var iter = newIter()
def newIter() =
session.execute(preparedSelectMessages.bind(persistenceId, currentPnr, fromSnr, toSnr)).iterator
final def hasNext: Boolean = {
if (iter.hasNext) true
else if (!inUse) false
} else {
currentPnr += 1
fromSnr = currentSnr
iter = newIter()
hasNext
}
}
def next(): Row = {
val row = iter.next()
currentSnr = row.getLong("sequence_nr")
row
}
}
class RowIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long) extends Iterator[Row] {
var currentPnr = partitionNr(fromSequenceNr)
var currentSnr = fromSequenceNr
var fromSnr = fromSequenceNr
var toSnr = toSequenceNr
var iter = newIter()
def newIter() =
session.execute(preparedSelectMessages.bind(persistenceId, currentPnr, fromSnr, toSnr)).iterator
final def hasNext: Boolean = {
if (iter.hasNext) true
else if (!inUse) false
} else {
currentPnr += 1
fromSnr = currentSnr
iter = newIter()
hasNext
}
}
def next(): Row = {
val row = iter.next()
currentSnr = row.getLong("sequence_nr")
row
}
}
class RowIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long) extends Iterator[Row] {
var currentPnr = partitionNr(fromSequenceNr)
var currentSnr = fromSequenceNr
var fromSnr = fromSequenceNr
var toSnr = toSequenceNr
var iter = newIter()
def newIter() =
session.execute(preparedSelectMessages.bind(persistenceId, currentPnr, fromSnr, toSnr)).iterator
final def hasNext: Boolean = {
if (iter.hasNext) true
else if (!inUse) false
} else {
currentPnr += 1
fromSnr = currentSnr
iter = newIter()
hasNext
}
}
def next(): Row = {
val row = iter.next()
currentSnr = row.getLong("sequence_nr")
row
}
}
Non blocking asynchronous replay
private[this] val queries: CassandraReadJournal =
new CassandraReadJournal(
extendedActorSystem,
context.system.settings.config.getConfig("cassandra-query-journal"))
override def asyncReplayMessages(
persistenceId: String,
fromSequenceNr: Long,
toSequenceNr: Long,
max: Long)(replayCallback: (PersistentRepr) => Unit): Future[Unit] =
queries
.eventsByPersistenceId(
persistenceId,
fromSequenceNr,
toSequenceNr,
max,
replayMaxResultSize,
None,
"asyncReplayMessages")
.runForeach(replayCallback)
.map(_ => ())
private[this] val queries: CassandraReadJournal =
new CassandraReadJournal(
extendedActorSystem,
context.system.settings.config.getConfig("cassandra-query-journal"))
override def asyncReplayMessages(
persistenceId: String,
fromSequenceNr: Long,
toSequenceNr: Long,
max: Long)(replayCallback: (PersistentRepr) => Unit): Future[Unit] =
queries
.eventsByPersistenceId(
persistenceId,
fromSequenceNr,
toSequenceNr,
max,
replayMaxResultSize,
None,
"asyncReplayMessages")
.runForeach(replayCallback)
.map(_ => ())
Benchmarks
5000
10 000
15 000
20 000
25 000
30 000
35 000
40 000
5000
10 000
15 000
20 000
25 000
30 000
35 000
40 000
0 0
10 000
20 000
30 000
40 000
0
50 000
Time(ms)
Time(ms)
Time(ms)
Actors
Threads, Actors
Threads
20 40 60 80 100 120 1405000 10000 15000 20000 25000 30000
10 20 30 40 50 60 70
45 000
50 000
blocking
asynchronous
REPLAY STRONG SCALING
WEAK SCALING
my-dispatcher {
type = "Dispatcher"
executor = "thread-pool-executor"
thread-pool-executor {
fixed-pool-size = $fixedPoolSize
}
throughput = $throughput
}
my-dispatcher {
type = "Dispatcher"
executor = "fork-join-executor"
fork-join-executor {
parallelism-min = $parallelismMin
parallelism-max = $parallelismMax
parallelism-factor = $parallelismFactor
}
throughput = $throughput
}
cassandra-journal {
plugin-dispatcher = $pluginDispatcher
replay-dispatcher = $replayDispatcher
max-result-size = $resultSize
max-result-size-replay = $resultSizeReplay
target-partition-size = $partitionSize
}
cassandra-query-journal {
plugin-dispatcher = $queryPluginDispatcher
max-buffer-size = $bufferSize
max-result-size-query = $resultSizeReplay
}
node_id
Alternative architecture
0
1
persistence_id 0,
event 0
persistence_id 0,
event 1
persistence_id 1,
event 0
persistence_id 0,
event 2
persistence_id 2,
event 0
persistence_id 0,
event 3
persistence_id 0,
event 0
persistence_id 0,
event 1
persistence_id 1,
event 0
persistence_id 2,
event 0
persistence_id 0,
event 2
persistence_id 0,
event 3
tag 1 0
all Ids
Id 0,
event 1
Id 2,
event 1
0 1
0 0 event 1event o
tag 1 0
allIds
Id 0,
event 1
Id 2,
event 1
0 1
0 0 event 0 event 1
val boundStatements = statementGroup(eventsByPersistenceId, eventsByTag, allPersistenceIds)
Future.sequence(boundStatements).flatMap { stmts =>
val batch = new BatchStatement().setConsistencyLevel(...).setRetryPolicy(...)
stmts.foreach(batch.add)
session.underlying().flatMap(_.executeAsync(batch))
}
tag 1 0
allIds
Id 0,
event 1
Id 2,
event 1
0 1
0 0 event 0 event 1
val boundStatements = statementGroup(eventsByPersistenceId, eventsByTag, allPersistenceIds)
Future.sequence(boundStatements).flatMap { stmts =>
val batch = new BatchStatement().setConsistencyLevel(...).setRetryPolicy(...)
stmts.foreach(batch.add)
session.underlying().flatMap(_.executeAsync(batch))
}
val eventsByPersistenceIdStatement = statementGroup(eventsByPersistenceIdStatement)
val boundStatements = statementGroup(eventsByTagStatement, allPersistenceIdsStatement)
...
session.underlying().flatMap { s =>
val ebpResult = s.executeAsync(eventsByPersistenceIdStatement)
val batchResult = s.executeAsync(batch))
...
}
tag 1 0
allIds
Id 0,
event 1
Id 2,
event 1
0 1
0 0 event 0 event 1
val eventsByPersistenceIdStatement = statementGroup(eventsByPersistenceIdStatement)
val boundStatements = statementGroup(eventsByTagStatement, allPersistenceIdsStatement)
...
session.underlying().flatMap { s =>
val ebpResult = s.executeAsync(eventsByPersistenceIdStatement)
val batchResult = s.executeAsync(batch))
...
}
tag 1 0
allIds
Id 0,
event 1
Id 2,
event 1
0 1
0 0 event 0 event 1
Event time processing
● Ingestion time, processing time, event time
Data in Motion: Streaming Static Data Efficiently 2
Ordering
10 2
1 12:34:57 1
KEY TIME VALUE
2 12:34:58 2
KEY TIME VALUE
0 12:34:56 0
KEY TIME VALUE
0
1
2
1 12:34:57 1
KEY TIME VALUE
2 12:34:58 2
KEY TIME VALUE
0 12:34:56 0
KEY TIME VALUE
Distributed causal stream merging
Id 0,
event 2
Id 0,
event 1
Id 0,
event 0
Id 1,
event 00
1
Id 2,
event 0
Id 0,
event 3
node_id
Id 0,
event 2
Id 0,
event 1
Id 0,
event 0
Id 1,
event 00
1
Id 2,
event 0
Id 0,
event 3
Id 0,
event 0
node_id
Id 0,
event 2
Id 0,
event 1
Id 0,
event 0
Id 1,
event 00
1
Id 2,
event 0
Id 0,
event 3
Id 0,
event 0
node_id
Id 0,
event 2
Id 0,
event 1
Id 0,
event 0
Id 1,
event 00
1
Id 2,
event 0
Id 0,
event 3
Id 0,
event 0
node_id
persistence
_id
seq
0 0
1 . . .
2 . . .
persistence
_id
seq
0 1
1 . . .
2 . . .
Id 0,
event 2
Id 0,
event 1
Id 0,
event 0
Id 1,
event 0
node_id
0
1
Id 2,
event 0
Id 0,
event 0
Id 0,
event 1
Id 0,
event 3
persistence
_id
seq
0 2
1 0
2 0
Id 0,
event 1
Id 0,
event 0
Id 1,
event 0
node_id
0
1
Id 2,
event 0
Id 0,
event 0
Id 0,
event 1
Id 0,
event 2
Id 0,
event 3
Id 2,
event 0
Id 0,
event 2
Id 1,
event 0
Id 0,
event 2
Id 0,
event 1
Id 0,
event 0
Id 1,
event 00
1
Id 2,
event 0
Id 0,
event 3
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 2
Id 0,
event 3
node_id
Id 1,
event 0
persistence
_id
seq
0 3
1 0
2 0
Id 0,
event 2
Id 0,
event 1
Id 0,
event 0
Id 1,
event 00
1
Id 2,
event 0
Id 0,
event 3
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 2
node_id
Id 1,
event 0
0 0 Id 0,
event 0
Id 0,
event 1
Replay
Id 0,
event 2
Id 0,
event 1
Id 0,
event 0
Id 1,
event 00
1
Id 2,
event 0
Id 0,
event 3
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 2
node_id
Id 1,
event 0
0 0 Id 0,
event 0
Id 0,
event 1
Id 0,
event 2
Id 0,
event 1
Id 0,
event 0
Id 1,
event 00
1
Id 2,
event 0
Id 0,
event 3
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 2
Id 1,
event 0
0 0 Id 0,
event 0
Id 0,
event 1
node_id
Id 0,
event 2
Id 0,
event 1
Id 0,
event 0
Id 1,
event 00
1
Id 2,
event 0
Id 0,
event 3
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 2
Id 1,
event 0
0 0 Id 0,
event 0
Id 0,
event 1
node_id
persistence
_id
seq
0 2
Id 0,
event 2
Id 0,
event 1
Id 0,
event 0
Id 1,
event 00
Id 2,
event 0
Id 0,
event 3
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 2
Id 1,
event 0
0 0 Id 0,
event 0
Id 0,
event 1
persistence
_id
seq
0 2
stream_id seq
0 1
1 2
1
node_id
Exactly once delivery
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 2
Id 0,
event 3
Id 1,
event 0
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 2
Id 0,
event 3
Id 1,
event 0
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 2
Id 0,
event 3
Id 1,
event 0
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 3
Id 1,
event 0
ACK ACK ACK ACK ACK
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 2
Id 0,
event 3
Id 1,
event 0
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 3
Id 1,
event 0
ACK ACK ACK ACK ACK
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 2
Id 0,
event 3
Id 1,
event 0
Id 0,
event 0
Id 0,
event 1
Id 2,
event 0
Id 0,
event 3
Id 1,
event 0
ACK ACK ACK ACK ACK
Exactly once delivery
● Durable offset
0 1 2 3 4
0 1 2 3 4
10 2 3 4
10 3 42
node_id
0
1
Id 0,
event 0
Id 0,
event 1
Id 1,
event 0
Id 0,
event 2
Id 2,
event 0
Id 0,
event 3
Id 0,
event 0
Id 0,
event 1
Id 1,
event 0
Id 2,
event 0
Id 0,
event 2
Id 0,
event 3
tag 1 0
allIds
Id 0,
event 1
Id 2,
event 1
0 1
0 0 event 0 event 1
val conf = new SparkConf().setAppName("...").setMaster("...").set("spark.cassandra.connection.host", "...")
val sc = new SparkContext(conf)
implicit val ordering = new Ordering[(String, Double)] {
override def compare(x: (String, Double), y: (String, Double)): Int =
implicitly[Ordering[Double]].compare(x._2, y._2)
}
sc.eventTable()
.cache()
.flatMap {
case (JournalKey(persistenceId, _, _), BalanceUpdatedEvent(change)) =>
(persistenceId -> change) :: Nil
case _ => Nil
}
.reduceByKey(_ + _)
.top(100)
.foreach(println)
sc.stop()
Akka Analytics
val conf = new SparkConf().setAppName("...").setMaster("...").set("spark.cassandra.connection.host", "...")
val sc = new StreamingContext(conf, Seconds(5))
implicit val ordering = new Ordering[(String, Double)] {
override def compare(x: (String, Double), y: (String, Double)): Int =
implicitly[Ordering[Double]].compare(x._2, y._2)
}
sc.eventTable()
.cache()
.flatMap {
case (JournalKey(persistenceId, _, _), BalanceUpdatedEvent(change)) =>
(persistenceId -> change) :: Nil
case _ => Nil
}
.reduceByKey(_ + _)
.top(100)
.foreach(println)
sc.stop()
internet
services
devices
social
Kafka
Stream
processing
apps
Stream
consumer
Search
Apps
Services
Databases
Batch
Serialisation
Distributed systems
User
Mobile
System
Microservice
Microservice
Microservice
Microservice Microservice Microservice
Microservice
CQRS/ES Relational NoSQL
Client 1
Client 2
Client 3
Update
Update
Update
Model devices Model devices Model devices
Input data Input data Input data
Parameter devices
P
ΔP
ΔP
ΔP
Challenges
● All the solved problems
○ Exactly once delivery
○ Consistency
○ Availability
○ Fault tolerance
○ Cross service invariants and consistency
○ Transactions
○ Automated deployment and configuration management
○ Serialization, versioning, compatibility
○ Automated elasticity
○ No downtime version upgrades
○ Graceful shutdown of nodes
○ Distributed system verification, logging, tracing, monitoring, debugging
○ Split brains
○ ...
Conclusion
● From request, response, synchronous, mutable state
● To streams, asynchronous messaging
● Production ready distributed systems
Questions
MANCHESTER LONDON NEW YORK
MANCHESTER LONDON NEW YORK
@zapletal_martin @cakesolutions
347 708 1518
enquiries@cakesolutions.net
We are hiring
http://www.cakesolutions.net/careers

More Related Content

What's hot (20)

PPTX
Rxjs swetugg
Christoffer Noring
 
PDF
Rxjs vienna
Christoffer Noring
 
PDF
Sustaining Test-Driven Development
AgileOnTheBeach
 
PDF
The Ring programming language version 1.4 book - Part 17 of 30
Mahmoud Samir Fayed
 
PPTX
Rxjs ngvikings
Christoffer Noring
 
PPTX
Unit test candidate solutions
benewu
 
PPTX
A Test of Strength
Chris Oldwood
 
PPTX
Big Data Day LA 2016/ Hadoop/ Spark/ Kafka track - Iterative Spark Developmen...
Data Con LA
 
PDF
Introduction to Mongodb execution plan and optimizer
Mydbops
 
PPT
Dynamically Evolving Systems: Cluster Analysis Using Time
Magnify Analytic Solutions
 
PDF
The Ring programming language version 1.5.2 book - Part 67 of 181
Mahmoud Samir Fayed
 
PPTX
Using Change Streams to Keep Up with Your Data
MongoDB
 
PDF
React table tutorial project setup, use table, and usefilter
Katy Slemon
 
PPT
Ken 20150306 ćżƒćŸ—ćˆ†äș«
LearningTech
 
PDF
How to implement g rpc services in nodejs
Katy Slemon
 
PDF
ITB2019 10 in 50: Ten Coldbox Modules You Should be Using in Every App - Jon ...
Ortus Solutions, Corp
 
PDF
Advanced Akka For Architects
Lightbend
 
KEY
SOLID Principles
Chris Weldon
 
PPTX
Angular2 rxjs
Christoffer Noring
 
PPTX
Rxjs ppt
Christoffer Noring
 
Rxjs swetugg
Christoffer Noring
 
Rxjs vienna
Christoffer Noring
 
Sustaining Test-Driven Development
AgileOnTheBeach
 
The Ring programming language version 1.4 book - Part 17 of 30
Mahmoud Samir Fayed
 
Rxjs ngvikings
Christoffer Noring
 
Unit test candidate solutions
benewu
 
A Test of Strength
Chris Oldwood
 
Big Data Day LA 2016/ Hadoop/ Spark/ Kafka track - Iterative Spark Developmen...
Data Con LA
 
Introduction to Mongodb execution plan and optimizer
Mydbops
 
Dynamically Evolving Systems: Cluster Analysis Using Time
Magnify Analytic Solutions
 
The Ring programming language version 1.5.2 book - Part 67 of 181
Mahmoud Samir Fayed
 
Using Change Streams to Keep Up with Your Data
MongoDB
 
React table tutorial project setup, use table, and usefilter
Katy Slemon
 
Ken 20150306 ćżƒćŸ—ćˆ†äș«
LearningTech
 
How to implement g rpc services in nodejs
Katy Slemon
 
ITB2019 10 in 50: Ten Coldbox Modules You Should be Using in Every App - Jon ...
Ortus Solutions, Corp
 
Advanced Akka For Architects
Lightbend
 
SOLID Principles
Chris Weldon
 
Angular2 rxjs
Christoffer Noring
 
Rxjs ppt
Christoffer Noring
 

Viewers also liked (10)

PDF
Machine learning at Scale with Apache Spark
Martin Zapletal
 
PDF
Large volume data analysis on the Typesafe Reactive Platform
Martin Zapletal
 
PDF
Apache spark - Installation
Martin Zapletal
 
PDF
Large volume data analysis on the Typesafe Reactive Platform - Big Data Scala...
Martin Zapletal
 
PDF
Spark Based Distributed Deep Learning Framework For Big Data Applications
Humoyun Ahmedov
 
PPTX
Apache spark - History and market overview
Martin Zapletal
 
KEY
Curator intro
Jordan Zimmerman
 
PDF
Cassandra as an event sourced journal for big data analytics Cassandra Summit...
Martin Zapletal
 
PDF
Apache spark - Spark's distributed programming model
Martin Zapletal
 
PDF
codecentric AG: CQRS and Event Sourcing Applications with Cassandra
DataStax Academy
 
Machine learning at Scale with Apache Spark
Martin Zapletal
 
Large volume data analysis on the Typesafe Reactive Platform
Martin Zapletal
 
Apache spark - Installation
Martin Zapletal
 
Large volume data analysis on the Typesafe Reactive Platform - Big Data Scala...
Martin Zapletal
 
Spark Based Distributed Deep Learning Framework For Big Data Applications
Humoyun Ahmedov
 
Apache spark - History and market overview
Martin Zapletal
 
Curator intro
Jordan Zimmerman
 
Cassandra as an event sourced journal for big data analytics Cassandra Summit...
Martin Zapletal
 
Apache spark - Spark's distributed programming model
Martin Zapletal
 
codecentric AG: CQRS and Event Sourcing Applications with Cassandra
DataStax Academy
 
Ad

Similar to Data in Motion: Streaming Static Data Efficiently 2 (20)

PDF
Resilient Applications with Akka Persistence - Scaladays 2014
Björn Antonsson
 
PDF
Akka persistence webinar
patriknw
 
PDF
Akka persistence == event sourcing in 30 minutes
Konrad Malawski
 
PDF
DDDing Tools = Akka Persistence
Konrad Malawski
 
PPTX
Actors, akka, streams
Tomer Ben David
 
PDF
Using Akka Persistence to build a configuration datastore
Anargyros Kiourkos
 
PDF
Cake Solutions: Cassandra as event sourced journal for big data analytics
DataStax Academy
 
PDF
Cassandra as event sourced journal for big data analytics
Anirvan Chakraborty
 
PDF
HBase RowKey design for Akka Persistence
Konrad Malawski
 
PDF
Event-sourced architectures with Akka
Sander Mak (@Sander_Mak)
 
PDF
Event-sourced architectures with Akka - Sander Mak
NLJUG
 
PDF
aming distribution: formal protocols for Akka Typed
J On The Beach
 
PDF
Taming Distribution: Formal Protocols for Akka Typed
Roland Kuhn
 
PDF
Event Sourcing using Akka on AWS
Daniel Pfeiffer
 
PDF
Avoiding the Pit of Despair - Event Sourcing with Akka and Cassandra
Luke Tillman
 
PDF
Akka with Scala
Oto Brglez
 
PPTX
CQRS + ES with Scala and Akka
Bharadwaj N
 
PDF
Event Sourcing on AWS Using Akka in Java
Daniel Pfeiffer
 
PDF
Event Sourcing - what could possibly go wrong?
Andrzej Ludwikowski
 
PDF
Andrzej Ludwikowski - Event Sourcing - what could possibly go wrong? - Codemo...
Codemotion
 
Resilient Applications with Akka Persistence - Scaladays 2014
Björn Antonsson
 
Akka persistence webinar
patriknw
 
Akka persistence == event sourcing in 30 minutes
Konrad Malawski
 
DDDing Tools = Akka Persistence
Konrad Malawski
 
Actors, akka, streams
Tomer Ben David
 
Using Akka Persistence to build a configuration datastore
Anargyros Kiourkos
 
Cake Solutions: Cassandra as event sourced journal for big data analytics
DataStax Academy
 
Cassandra as event sourced journal for big data analytics
Anirvan Chakraborty
 
HBase RowKey design for Akka Persistence
Konrad Malawski
 
Event-sourced architectures with Akka
Sander Mak (@Sander_Mak)
 
Event-sourced architectures with Akka - Sander Mak
NLJUG
 
aming distribution: formal protocols for Akka Typed
J On The Beach
 
Taming Distribution: Formal Protocols for Akka Typed
Roland Kuhn
 
Event Sourcing using Akka on AWS
Daniel Pfeiffer
 
Avoiding the Pit of Despair - Event Sourcing with Akka and Cassandra
Luke Tillman
 
Akka with Scala
Oto Brglez
 
CQRS + ES with Scala and Akka
Bharadwaj N
 
Event Sourcing on AWS Using Akka in Java
Daniel Pfeiffer
 
Event Sourcing - what could possibly go wrong?
Andrzej Ludwikowski
 
Andrzej Ludwikowski - Event Sourcing - what could possibly go wrong? - Codemo...
Codemotion
 
Ad

Recently uploaded (20)

PPTX
Fundamentals_of_Microservices_Architecture.pptx
MuhammadUzair504018
 
PPTX
Tally software_Introduction_Presentation
AditiBansal54083
 
PDF
Automate Cybersecurity Tasks with Python
VICTOR MAESTRE RAMIREZ
 
PPTX
3uTools Full Crack Free Version Download [Latest] 2025
muhammadgurbazkhan
 
PDF
Capcut Pro Crack For PC Latest Version {Fully Unlocked} 2025
hashhshs786
 
PDF
Alexander Marshalov - How to use AI Assistants with your Monitoring system Q2...
VictoriaMetrics
 
PPTX
Revolutionizing Code Modernization with AI
KrzysztofKkol1
 
PPT
MergeSortfbsjbjsfk sdfik k
RafishaikIT02044
 
PPTX
How Apagen Empowered an EPC Company with Engineering ERP Software
SatishKumar2651
 
PPTX
MailsDaddy Outlook OST to PST converter.pptx
abhishekdutt366
 
PDF
Unlock Efficiency with Insurance Policy Administration Systems
Insurance Tech Services
 
PPTX
Migrating Millions of Users with Debezium, Apache Kafka, and an Acyclic Synch...
MD Sayem Ahmed
 
PDF
vMix Pro 28.0.0.42 Download vMix Registration key Bundle
kulindacore
 
PPTX
Human Resources Information System (HRIS)
Amity University, Patna
 
PDF
Digger Solo: Semantic search and maps for your local files
seanpedersen96
 
DOCX
Import Data Form Excel to Tally Services
Tally xperts
 
PDF
Revenue streams of the Wazirx clone script.pdf
aaronjeffray
 
PDF
Powering GIS with FME and VertiGIS - Peak of Data & AI 2025
Safe Software
 
PDF
Understanding the Need for Systemic Change in Open Source Through Intersectio...
Imma Valls Bernaus
 
PDF
GetOnCRM Speeds Up Agentforce 3 Deployment for Enterprise AI Wins.pdf
GetOnCRM Solutions
 
Fundamentals_of_Microservices_Architecture.pptx
MuhammadUzair504018
 
Tally software_Introduction_Presentation
AditiBansal54083
 
Automate Cybersecurity Tasks with Python
VICTOR MAESTRE RAMIREZ
 
3uTools Full Crack Free Version Download [Latest] 2025
muhammadgurbazkhan
 
Capcut Pro Crack For PC Latest Version {Fully Unlocked} 2025
hashhshs786
 
Alexander Marshalov - How to use AI Assistants with your Monitoring system Q2...
VictoriaMetrics
 
Revolutionizing Code Modernization with AI
KrzysztofKkol1
 
MergeSortfbsjbjsfk sdfik k
RafishaikIT02044
 
How Apagen Empowered an EPC Company with Engineering ERP Software
SatishKumar2651
 
MailsDaddy Outlook OST to PST converter.pptx
abhishekdutt366
 
Unlock Efficiency with Insurance Policy Administration Systems
Insurance Tech Services
 
Migrating Millions of Users with Debezium, Apache Kafka, and an Acyclic Synch...
MD Sayem Ahmed
 
vMix Pro 28.0.0.42 Download vMix Registration key Bundle
kulindacore
 
Human Resources Information System (HRIS)
Amity University, Patna
 
Digger Solo: Semantic search and maps for your local files
seanpedersen96
 
Import Data Form Excel to Tally Services
Tally xperts
 
Revenue streams of the Wazirx clone script.pdf
aaronjeffray
 
Powering GIS with FME and VertiGIS - Peak of Data & AI 2025
Safe Software
 
Understanding the Need for Systemic Change in Open Source Through Intersectio...
Imma Valls Bernaus
 
GetOnCRM Speeds Up Agentforce 3 Deployment for Enterprise AI Wins.pdf
GetOnCRM Solutions
 

Data in Motion: Streaming Static Data Efficiently 2

  • 2. Martin Zapletal @zapletal_martin #ScalaDays Data in Motion: Streaming Static Data Efficiently in Akka Persistence (and elsewhere) @cakesolutions
  • 3. Data at scale ● Reactive ● Real time, asynchronous and message driven ● Elastic and scalable ● Resilient and fault tolerant
  • 5. persistence_id1, event 2 persistence_id1, event 3 persistence_id1, event 4 persistence_id1, event 1 2 35 Akka Persistence 1 4
  • 6. class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor { override val persistenceId: String = extractId(self.path.name) override def receiveCommand: Receive = active(initialState) private def active( balance: State ): Receive = { case command: AccountCommand => command match { case cmd: UpdateBalanceCommand => cmd.validate().fold({ balanceUpdated => persist(balanceUpdated) { persisted => val updatedState = balance.update(persisted) sender() ! updatedState context.become(active(updatedState)) } }, processValidationErrors) ... } } }
  • 7. class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor { override val persistenceId: String = extractId(self.path.name) override def receiveCommand: Receive = active(initialState) private def active( balance: State ): Receive = { case command: AccountCommand => command match { case cmd: UpdateBalanceCommand => cmd.validate().fold({ balanceUpdated => persist(balanceUpdated) { persisted => val updatedState = balance.update(persisted) sender() ! updatedState context.become(active(updatedState)) } }, processValidationErrors) ... } } }
  • 8. class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor { override val persistenceId: String = extractId(self.path.name) override def receiveCommand: Receive = active(initialState) private def active( balance: State ): Receive = { case command: AccountCommand => command match { case cmd: UpdateBalanceCommand => cmd.validate().fold({ balanceUpdated => persist(balanceUpdated) { persisted => val updatedState = balance.update(persisted) sender() ! updatedState context.become(active(updatedState)) } }, processValidationErrors) ... } } }
  • 9. class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor { override val persistenceId: String = extractId(self.path.name) override def receiveCommand: Receive = active(initialState) private def active( balance: State ): Receive = { case command: AccountCommand => command match { case cmd: UpdateBalanceCommand => cmd.validate().fold({ balanceUpdated => persist(balanceUpdated) { persisted => val updatedState = balance.update(persisted) sender() ! updatedState context.become(active(updatedState)) } }, processValidationErrors) ... } } }
  • 10. class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor { override val persistenceId: String = extractId(self.path.name) override def receiveCommand: Receive = active(initialState) private def active( balance: State ): Receive = { case command: AccountCommand => command match { case cmd: UpdateBalanceCommand => cmd.validate().fold({ balanceUpdated => persist(balanceUpdated) { persisted => val updatedState = balance.update(persisted) sender() ! updatedState context.become(active(updatedState)) } }, processValidationErrors) ... } } }
  • 11. case cmd: UpdateGroupBalanceCommand => cmd.validate().fold({ groupBalanceUpdated => persist(Tagged(groupBalanceUpdated, Set("tag1"))) { persisted => sender() ! groupBalanceUpdated } }, processValidationErrors)
  • 12. case cmd: UpdateGroupBalanceCommand => cmd.validate().fold({ groupBalanceUpdated => persist(Tagged(groupBalanceUpdated, Set("tag1"))) { persisted => sender() ! groupBalanceUpdated } }, processValidationErrors)
  • 13. override def receiveRecover: Receive = { var state: State = initialState { case balanceUpdated: BalanceUpdatedEvent => state = state.update(balanceUpdated) case RecoveryCompleted => context.become(active(state)) } }
  • 14. override def receiveRecover: Receive = { var state: State = initialState { case balanceUpdated: BalanceUpdatedEvent => state = state.update(balanceUpdated) case RecoveryCompleted => context.become(active(state)) } }
  • 15. 0 1 2 3 4 0 5 10 1 5 Inserted value 0 Inserted value 5 Inserted value 10 Inserted value 1 Inserted value 55 Log data structure
  • 16. Persistence_ id partition_nr 0 0 0 1 event 1 event 100 event 101 event 102 event 0 event 2 1 0 event 0 event 1 event 2 Akka Persistence Cassandra ● Purely pull ● Event (log) data
  • 17. Akka Persistence Query ● eventsByPersistenceId, allPersistenceIds, eventsByTag 1 4 2 35 persistence_id1, event 2 persistence_id1, event 3 persistence_id1, event 4 persistence_id1, event 1
  • 18. implicit val system: ActorSystem = ... implicit val materializer: Materializer = ... lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal") queries .eventsByPersistenceId(persistenceId, 0, Long.MaxValue) .runForeach(println)
  • 19. implicit val system: ActorSystem = ... implicit val materializer: Materializer = ... lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal") queries .eventsByPersistenceId(persistenceId, 0, Long.MaxValue) .runForeach(println)
  • 20. implicit val system: ActorSystem = ... implicit val materializer: Materializer = ... lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal") queries .eventsByPersistenceId(persistenceId, 0, Long.MaxValue) .runForeach(println)
  • 22. implicit val system: ActorSystem = ... implicit val materializer: Materializer = ... lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal") queries .allPersistenceIds() .runForeach(println)
  • 23. implicit val system: ActorSystem = ... implicit val materializer: Materializer = ... lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal") queries .allPersistenceIds() .runForeach(println)
  • 25. implicit val system: ActorSystem = ... implicit val materializer: Materializer = ... lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal") queries .eventsByTag("tag1", 0) .runForeach(println)
  • 26. implicit val system: ActorSystem = ... implicit val materializer: Materializer = ... lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal") queries .eventsByTag("tag1", 0) .runForeach(println)
  • 27. implicit val system: ActorSystem = ... implicit val materializer: Materializer = ... lazy val queries: CassandraReadJournal = PersistenceQuery(system).readJournalFor[CassandraReadJournal]("cassandra-query-journal") val transform = Flow[EventEnvelope] .collect { case EventEnvelope(_, _, _, BalanceUpdatedEvent(value)) => value } .scan(new CircularFifoQueue[Double](5)){ (s, d) => s.add(d); s } val g = RunnableGraph.fromGraph { GraphDSL.create() { implicit builder: GraphDSL.Builder[NotUsed] => import akka.stream.scaladsl.GraphDSL.Implicits._ queries.eventsByPersistenceId(persistenceId, 0, Long.MaxValue) ~> transform ~> kafkaSink ClosedShape } } g.run()
  • 28. implicit val system: ActorSystem = ... implicit val materializer: Materializer = ... lazy val queries: CassandraReadJournal = PersistenceQuery(system).readJournalFor[CassandraReadJournal]("cassandra-query-journal") val transform = Flow[EventEnvelope] .collect { case EventEnvelope(_, _, _, BalanceUpdatedEvent(value)) => value } .scan(new CircularFifoQueue[Double](5)){ (s, d) => s.add(d); s } val g = RunnableGraph.fromGraph { GraphDSL.create() { implicit builder: GraphDSL.Builder[NotUsed] => import akka.stream.scaladsl.GraphDSL.Implicits._ queries.eventsByPersistenceId(persistenceId, 0, Long.MaxValue) ~> transform ~> kafkaSink ClosedShape } } g.run()
  • 29. implicit val system: ActorSystem = ... implicit val materializer: Materializer = ... lazy val queries: CassandraReadJournal = PersistenceQuery(system).readJournalFor[CassandraReadJournal]("cassandra-query-journal") val transform = Flow[EventEnvelope] .collect { case EventEnvelope(_, _, _, BalanceUpdatedEvent(value)) => value } .scan(new CircularFifoQueue[Double](5)){ (s, d) => s.add(d); s } val g = RunnableGraph.fromGraph { GraphDSL.create() { implicit builder: GraphDSL.Builder[NotUsed] => import akka.stream.scaladsl.GraphDSL.Implicits._ queries.eventsByPersistenceId(persistenceId, 0, Long.MaxValue) ~> transform ~> kafkaSink ClosedShape } } g.run()
  • 30. public class AccountEntity extends PersistentEntity<AccountCommand, AccountEvent, State> { @Override public Behavior initialBehavior(Optional<State> snapshotState) { BehaviorBuilder b = newBehaviorBuilder(snapshotState.orElse(initialState); b.setCommandHandler(UpdateBalanceCommand.class, (cmd, ctx) -> { if (! validate(cmd)) { ctx.invalidCommand("..."); return ctx.done(); } else { return ctx.thenPersist( new BalanceUpdatedEvent(cmd.value), () -> ctx.reply(Done.getInstance())); } }); b.setEventHandler(BalanceUpdatedEvent.class, evt -> state.update(evt)); return b.build(); } }
  • 31. public class AccountEntity extends PersistentEntity<AccountCommand, AccountEvent, State> { @Override public Behavior initialBehavior(Optional<State> snapshotState) { BehaviorBuilder b = newBehaviorBuilder(snapshotState.orElse(initialState); b.setCommandHandler(UpdateBalanceCommand.class, (cmd, ctx) -> { if (! validate(cmd)) { ctx.invalidCommand("..."); return ctx.done(); } else { return ctx.thenPersist( new BalanceUpdatedEvent(cmd.value), () -> ctx.reply(Done.getInstance())); } }); b.setEventHandler(BalanceUpdatedEvent.class, evt -> state.update(evt)); return b.build(); } }
  • 32. public class AccountEntity extends PersistentEntity<AccountCommand, AccountEvent, State> { @Override public Behavior initialBehavior(Optional<State> snapshotState) { BehaviorBuilder b = newBehaviorBuilder(snapshotState.orElse(initialState); b.setCommandHandler(UpdateBalanceCommand.class, (cmd, ctx) -> { if (! validate(cmd)) { ctx.invalidCommand("..."); return ctx.done(); } else { return ctx.thenPersist( new BalanceUpdatedEvent(cmd.value), () -> ctx.reply(Done.getInstance())); } }); b.setEventHandler(BalanceUpdatedEvent.class, evt -> state.update(evt)); return b.build(); } }
  • 33. public class AccountEventProcessor extends CassandraReadSideProcessor<AccountEvent> { AccountEventProcessor state = ... @Override public AggregateEventTag<AccountEvent> aggregateTag() { return Tag1.INSTANCE; } @Override public CompletionStage<Optional<UUID>> prepare(CassandraSession session) { return prepareCreateTables(session).thenCompose(a -> 
 // Prepare tables, statements, etc. } @Override public EventHandlers defineEventHandlers(EventHandlersBuilder builder) { builder.setEventHandler(AccountEvent.class, this::processAccountEvent); return builder.build(); } private CompletionStage<List<BoundStatement>> processAccountEvent(AccountEvent event, UUID offset) { BoundStatement bindWriteAnalytics = writeAnalytics.bind(); writeAnalytics.setString("entity_id", event.id); ... return completedStatements(Arrays.asList(bindWriteAnalytics)); } }
  • 34. public class AccountEventProcessor extends CassandraReadSideProcessor<AccountEvent> { AccountEventProcessor state = ... @Override public AggregateEventTag<AccountEvent> aggregateTag() { return Tag1.INSTANCE; } @Override public CompletionStage<Optional<UUID>> prepare(CassandraSession session) { return prepareCreateTables(session).thenCompose(a -> 
 // Prepare tables, statements, etc. } @Override public EventHandlers defineEventHandlers(EventHandlersBuilder builder) { builder.setEventHandler(AccountEvent.class, this::processAccountEvent); return builder.build(); } private CompletionStage<List<BoundStatement>> processAccountEvent(AccountEvent event, UUID offset) { BoundStatement bindWriteAnalytics = writeAnalytics.bind(); writeAnalytics.setString("entity_id", event.id); ... return completedStatements(Arrays.asList(bindWriteAnalytics)); } }
  • 35. public class AccountEventProcessor extends CassandraReadSideProcessor<AccountEvent> { AccountEventProcessor state = ... @Override public AggregateEventTag<AccountEvent> aggregateTag() { return Tag1.INSTANCE; } @Override public CompletionStage<Optional<UUID>> prepare(CassandraSession session) { return prepareCreateTables(session).thenCompose(a -> 
 // Prepare tables, statements, etc. } @Override public EventHandlers defineEventHandlers(EventHandlersBuilder builder) { builder.setEventHandler(AccountEvent.class, this::processAccountEvent); return builder.build(); } private CompletionStage<List<BoundStatement>> processAccountEvent(AccountEvent event, UUID offset) { BoundStatement bindWriteAnalytics = writeAnalytics.bind(); writeAnalytics.setString("entity_id", event.id); ... return completedStatements(Arrays.asList(bindWriteAnalytics)); } }
  • 36. Streaming static data ● Turning database into a stream
  • 37. Pulling data from a log 0 0 10 5 5 10
  • 40. 10 10 5 5 0 0 0 0 10 5 5 10
  • 41. 10 10 5 5 0 0 0 0 10 15 15 5 5 10
  • 42. 0 0 15 15 5 5 15 15 10 10 5 5 0 0 10 10
  • 43. Actor publisher private[query] abstract class QueryActorPublisher[MessageType, State: ClassTag](refreshInterval: Option[FiniteDuration]) extends ActorPublisher[MessageType] { protected def initialState: Future[State] protected def initialQuery(initialState: State): Future[Action] protected def requestNext(state: State, resultSet: ResultSet): Future[Action] protected def requestNextFinished(state: State, resultSet: ResultSet): Future[Action] protected def updateState(state: State, row: Row): (Option[MessageType], State) protected def completionCondition(state: State): Boolean private[this] def nextBehavior(...): Receive = { if (shouldFetchMore(...)) { listenableFutureToFuture(resultSet.fetchMoreResults()).map(FetchedResultSet).pipeTo(self) awaiting(resultSet, state, finished) } else if (shouldIdle(...)) { idle(resultSet, state, finished) } else if (shouldComplete(...)) { onCompleteThenStop() Actor.emptyBehavior } else if (shouldRequestMore(...)) { if (finished) requestNextFinished(state, resultSet).pipeTo(self) else requestNext(state, resultSet).pipeTo(self) awaiting(resultSet, state, finished) } else { idle(resultSet, state, finished) } } }
  • 44. private[query] abstract class QueryActorPublisher[MessageType, State: ClassTag](refreshInterval: Option[FiniteDuration]) extends ActorPublisher[MessageType] { protected def initialState: Future[State] protected def initialQuery(initialState: State): Future[Action] protected def requestNext(state: State, resultSet: ResultSet): Future[Action] protected def requestNextFinished(state: State, resultSet: ResultSet): Future[Action] protected def updateState(state: State, row: Row): (Option[MessageType], State) protected def completionCondition(state: State): Boolean private[this] def nextBehavior(...): Receive = { if (shouldFetchMore(...)) { listenableFutureToFuture(resultSet.fetchMoreResults()).map(FetchedResultSet).pipeTo(self) awaiting(resultSet, state, finished) } else if (shouldIdle(...)) { idle(resultSet, state, finished) } else if (shouldComplete(...)) { onCompleteThenStop() Actor.emptyBehavior } else if (shouldRequestMore(...)) { if (finished) requestNextFinished(state, resultSet).pipeTo(self) else requestNext(state, resultSet).pipeTo(self) awaiting(resultSet, state, finished) } else { idle(resultSet, state, finished) } } }
  • 45. private[query] abstract class QueryActorPublisher[MessageType, State: ClassTag](refreshInterval: Option[FiniteDuration]) extends ActorPublisher[MessageType] { protected def initialState: Future[State] protected def initialQuery(initialState: State): Future[Action] protected def requestNext(state: State, resultSet: ResultSet): Future[Action] protected def requestNextFinished(state: State, resultSet: ResultSet): Future[Action] protected def updateState(state: State, row: Row): (Option[MessageType], State) protected def completionCondition(state: State): Boolean private[this] def nextBehavior(...): Receive = { if (shouldFetchMore(...)) { listenableFutureToFuture(resultSet.fetchMoreResults()).map(FetchedResultSet).pipeTo(self) awaiting(resultSet, state, finished) } else if (shouldIdle(...)) { idle(resultSet, state, finished) } else if (shouldComplete(...)) { onCompleteThenStop() Actor.emptyBehavior } else if (shouldRequestMore(...)) { if (finished) requestNextFinished(state, resultSet).pipeTo(self) else requestNext(state, resultSet).pipeTo(self) awaiting(resultSet, state, finished) } else { idle(resultSet, state, finished) } } }
  • 47. SELECT * FROM ${tableName} WHERE persistence_id = ? AND partition_nr = ? AND sequence_nr >= ? AND sequence_nr <= ? 0 0 0 1 event 1 event 100 event 101 event 102 event 0 event 2 Events by persistence id
  • 48. 0 0 0 1 event 1 event 100 event 101 event 102 event 2event 0
  • 49. 0 0 0 1 event 1 event 100 event 101 event 102 event 2event 0
  • 50. 0 0 0 1 event 1 event 100 event 101 event 102 event 2event 0
  • 51. 0 0 0 1 event 1 event 100 event 101 event 102 event 2event 0
  • 52. 0 0 0 1 event 1 event 100 event 101 event 102 event 2event 0
  • 53. 0 0 0 1 event 0 event 1 event 100 event 101 event 102 event 2
  • 54. private[query] class EventsByPersistenceIdPublisher(...) extends QueryActorPublisher[PersistentRepr, EventsByPersistenceIdState](...) { override protected def initialState: Future[EventsByPersistenceIdState] = { ... EventsByPersistenceIdState(initialFromSequenceNr, 0, currentPnr) } override protected def updateState( state: EventsByPersistenceIdState, Row: Row): (Option[PersistentRepr], EventsByPersistenceIdState) = { val event = extractEvent(row) val partitionNr = row.getLong("partition_nr") + 1 (Some(event), EventsByPersistenceIdState(event.sequenceNr + 1, state.count + 1, partitionNr)) } }
  • 55. private[query] class EventsByPersistenceIdPublisher(...) extends QueryActorPublisher[PersistentRepr, EventsByPersistenceIdState](...) { override protected def initialState: Future[EventsByPersistenceIdState] = { ... EventsByPersistenceIdState(initialFromSequenceNr, 0, currentPnr) } override protected def updateState( state: EventsByPersistenceIdState, Row: Row): (Option[PersistentRepr], EventsByPersistenceIdState) = { val event = extractEvent(row) val partitionNr = row.getLong("partition_nr") + 1 (Some(event), EventsByPersistenceIdState(event.sequenceNr + 1, state.count + 1, partitionNr)) } }
  • 56. 0 0 0 1 event 1 event 100 event 101 event 102 event 0 event 2 1 0 event 0 event 1 event 2 All persistence ids SELECT DISTINCT persistence_id, partition_nr FROM $tableName
  • 57. 0 0 0 1 event 1 event 100 event 101 event 102 event 0 event 2 1 0 event 0 event 1 event 2
  • 58. 0 0 0 1 event 1 event 100 event 101 event 102 event 0 event 2 1 0 event 0 event 1 event 2
  • 59. 0 0 0 1 event 1 event 100 event 101 event 102 event 0 event 2 1 0 event 0 event 1 event 2
  • 60. private[query] class AllPersistenceIdsPublisher(...) extends QueryActorPublisher[String, AllPersistenceIdsState](...) { override protected def initialState: Future[AllPersistenceIdsState] = Future.successful(AllPersistenceIdsState(Set.empty)) override protected def updateState( state: AllPersistenceIdsState, row: Row): (Option[String], AllPersistenceIdsState) = { val event = row.getString("persistence_id") if (state.knownPersistenceIds.contains(event)) { (None, state) } else { (Some(event), state.copy(knownPersistenceIds = state.knownPersistenceIds + event)) } } }
  • 61. private[query] class AllPersistenceIdsPublisher(...) extends QueryActorPublisher[String, AllPersistenceIdsState](...) { override protected def initialState: Future[AllPersistenceIdsState] = Future.successful(AllPersistenceIdsState(Set.empty)) override protected def updateState( state: AllPersistenceIdsState, row: Row): (Option[String], AllPersistenceIdsState) = { val event = row.getString("persistence_id") if (state.knownPersistenceIds.contains(event)) { (None, state) } else { (Some(event), state.copy(knownPersistenceIds = state.knownPersistenceIds + event)) } } }
  • 62. Events by tag 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 event 2, tag 1 1 0 event 0 event 1 event 2, tag 1
  • 63. 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 2, tag 1 1 0 event 0 event 1 event 0 event 2, tag 1
  • 64. 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 event 2, tag 1 1 0 event 1event 0 event 2, tag 1
  • 65. 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 event 2, tag 1 1 0 event 0 event 1 event 2, tag 1
  • 66. event 0 event 0 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 2, tag 1 1 0 event 1 event 2, tag 1
  • 67. event 0 event 0 event 1 0 0 0 1 event 100, tag 1 event 101 event 102 event 2, tag 1 1 0 event 2, tag 1 event 1, tag 1
  • 68. 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 2, tag 1 1 0 event 2, tag 1 event 0 event 0 event 1 event 1, tag 1
  • 69. event 1, tag 1 event 2, tag 1 event 0 event 0 event 1 event 1, tag 10 0 0 1 event 100, tag 1 event 101 event 102 1 0 event 2, tag 1
  • 70. event 2, tag 1 event 0 event 0 event 1 0 0 0 1 event 100, tag 1 event 101 event 102 1 0 event 2, tag 1 event 1, tag 1
  • 71. 0 0 0 1 1 0 event 2, tag 1 event 0 event 0 event 1 event 100, tag 1 event 101 event 102 event 2, tag 1 event 1, tag 1
  • 72. Events by tag Id 0, event 1 Id 1, event 2 Id 0, event 100 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 1 0 event 0 event 1 event 2, tag 1 Id 0, event 2 tag 1 1/1/2016 tag 1 1/2/2016 event 2, tag 1 SELECT * FROM $eventsByTagViewName$tagId WHERE tag$tagId = ? AND timebucket = ? AND timestamp > ? AND timestamp <= ? ORDER BY timestamp ASC LIMIT ?
  • 73. Id 1, event 2 Id 0, event 100 Id 0, event 1 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 Id 0, event 2 1 0 event 0 event 1 event 2, tag 1 tag 1 1/1/2016 tag 1 1/2/2016 event 2, tag 1
  • 74. Id 1, event 2 Id 0, event 100 Id 0, event 1 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 Id 0, event 2 1 0 event 0 event 1 event 2, tag 1 tag 1 1/1/2016 tag 1 1/2/2016 event 2, tag 1
  • 75. Id 0, event 100 Id 1, event 2 Id 0, event 1 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 Id 0, event 2 1 0 event 0 event 1 event 2, tag 1 tag 1 1/1/2016 tag 1 1/2/2016 event 2, tag 1
  • 76. Id 0, event 100 Id 1, event 2 Id 0, event 1 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 1 0 event 0 event 1 event 2, tag 1 tag 1 1/1/2016 tag 1 1/2/2016 event 2, tag 1 Id 0, event 2
  • 77. PRAM MR MWRYW Strong Serializable Linearizable Serializable Sequential RR SI Causal WFR EC CS MAW RC P-CI
  • 78. PRAM MR MWRYW Strong Serializable Linearizable Serializable Sequential RR SI Causal WFR EC CS MAW RC P-CI
  • 79. PRAM MR MWRYW Strong Serializable Linearizable Serializable Sequential RR SI Causal WFR EC CS MAW RC P-CI
  • 80. 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 event 2, tag 1 1 0 event 0 event 1 event 2, tag 1 tag 1 1/1/2016 tag 1 1/2/2016
  • 81. tag 1 1/1/2016 tag 1 1/2/2016 Id 0, event 1 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 1 0 event 0 event 1 event 2, tag 1 persistence _id seq 0 1 1 . . . event 2, tag 1
  • 82. Id 0, event 100 Id 0, event 1 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 1 0 event 0 event 1 event 2, tag 1 persistence _id seq 0 ? 1 . . . event 2, tag 1 tag 1 1/1/2016 tag 1 1/2/2016
  • 83. Id 0, event 100 Id 0, event 2 Id 0, event 1 0 0 0 1 event 1, tag 1 event 100, tag 1 event 101 event 102 event 0 1 0 event 0 event 1 event 2, tag 1 persistence _id seq 0 ? 1 event 2, tag 1 tag 1 1/1/2016 tag 1 1/2/2016 . . .
  • 84. def replay(): Unit = { val backtracking = isBacktracking val limit = if (backtracking) maxBufferSize else maxBufferSize - buf.size val toOffs = if (backtracking && abortDeadline.isEmpty) highestOffset else UUIDs.endOf(System.currentTimeMillis() - eventualConsistencyDelayMillis) context.actorOf(EventsByTagFetcher.props(tag, currTimeBucket, currOffset, toOffs, limit, backtracking, self, session, preparedSelect, seqNumbers, settings)) context.become(replaying(limit)) } def replaying(limit: Int): Receive = { case env @ UUIDPersistentRepr(offs, _) => // Deliver buffer case ReplayDone(count, seqN, highest) => // Request more case ReplayAborted(seqN, pid, expectedSeqNr, gotSeqNr) => // Causality violation, wait and retry. Only applicable if all events for persistence_id are tagged case ReplayFailed(cause) => // Failure case _: Request => // Deliver buffer case Continue => // Do nothing case Cancel => // Stop }
  • 85. def replay(): Unit = { val backtracking = isBacktracking val limit = if (backtracking) maxBufferSize else maxBufferSize - buf.size val toOffs = if (backtracking && abortDeadline.isEmpty) highestOffset else UUIDs.endOf(System.currentTimeMillis() - eventualConsistencyDelayMillis) context.actorOf(EventsByTagFetcher.props(tag, currTimeBucket, currOffset, toOffs, limit, backtracking, self, session, preparedSelect, seqNumbers, settings)) context.become(replaying(limit)) } def replaying(limit: Int): Receive = { case env @ UUIDPersistentRepr(offs, _) => // Deliver buffer case ReplayDone(count, seqN, highest) => // Request more case ReplayAborted(seqN, pid, expectedSeqNr, gotSeqNr) => // Causality violation, wait and retry. Only applicable if all events for persistence_id are tagged case ReplayFailed(cause) => // Failure case _: Request => // Deliver buffer case Continue => // Do nothing case Cancel => // Stop }
  • 86. Akka Persistence Cassandra Replay def asyncReplayMessages(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) (replayCallback: (PersistentRepr) => Unit): Future[Unit] = Future { new MessageIterator(persistenceId, fromSequenceNr, toSequenceNr, max).foreach(msg => { replayCallback(msg) }) } class MessageIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) extends Iterator [PersistentRepr] { private val initialFromSequenceNr = math.max(highestDeletedSequenceNumber(persistenceId) + 1, fromSequenceNr) private val iter = new RowIterator(persistenceId, initialFromSequenceNr, toSequenceNr) private var mcnt = 0L private var c: PersistentRepr = null private var n: PersistentRepr = PersistentRepr(Undefined) fetch() def hasNext: Boolean = ... def next(): PersistentRepr = 
 ... }
  • 87. Akka Persistence Cassandra Replay def asyncReplayMessages(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) (replayCallback: (PersistentRepr) => Unit): Future[Unit] = Future { new MessageIterator(persistenceId, fromSequenceNr, toSequenceNr, max).foreach(msg => { replayCallback(msg) }) } class MessageIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) extends Iterator [PersistentRepr] { private val initialFromSequenceNr = math.max(highestDeletedSequenceNumber(persistenceId) + 1, fromSequenceNr) private val iter = new RowIterator(persistenceId, initialFromSequenceNr, toSequenceNr) private var mcnt = 0L private var c: PersistentRepr = null private var n: PersistentRepr = PersistentRepr(Undefined) fetch() def hasNext: Boolean = ... def next(): PersistentRepr = 
 ... }
  • 88. Akka Persistence Cassandra Replay def asyncReplayMessages(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) (replayCallback: (PersistentRepr) => Unit): Future[Unit] = Future { new MessageIterator(persistenceId, fromSequenceNr, toSequenceNr, max).foreach(msg => { replayCallback(msg) }) } class MessageIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) extends Iterator [PersistentRepr] { private val initialFromSequenceNr = math.max(highestDeletedSequenceNumber(persistenceId) + 1, fromSequenceNr) private val iter = new RowIterator(persistenceId, initialFromSequenceNr, toSequenceNr) private var mcnt = 0L private var c: PersistentRepr = null private var n: PersistentRepr = PersistentRepr(Undefined) fetch() def hasNext: Boolean = ... def next(): PersistentRepr = 
 ... }
  • 89. class RowIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long) extends Iterator[Row] { var currentPnr = partitionNr(fromSequenceNr) var currentSnr = fromSequenceNr var fromSnr = fromSequenceNr var toSnr = toSequenceNr var iter = newIter() def newIter() = session.execute(preparedSelectMessages.bind(persistenceId, currentPnr, fromSnr, toSnr)).iterator final def hasNext: Boolean = { if (iter.hasNext) true else if (!inUse) false } else { currentPnr += 1 fromSnr = currentSnr iter = newIter() hasNext } } def next(): Row = { val row = iter.next() currentSnr = row.getLong("sequence_nr") row } }
  • 90. class RowIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long) extends Iterator[Row] { var currentPnr = partitionNr(fromSequenceNr) var currentSnr = fromSequenceNr var fromSnr = fromSequenceNr var toSnr = toSequenceNr var iter = newIter() def newIter() = session.execute(preparedSelectMessages.bind(persistenceId, currentPnr, fromSnr, toSnr)).iterator final def hasNext: Boolean = { if (iter.hasNext) true else if (!inUse) false } else { currentPnr += 1 fromSnr = currentSnr iter = newIter() hasNext } } def next(): Row = { val row = iter.next() currentSnr = row.getLong("sequence_nr") row } }
  • 91. class RowIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long) extends Iterator[Row] { var currentPnr = partitionNr(fromSequenceNr) var currentSnr = fromSequenceNr var fromSnr = fromSequenceNr var toSnr = toSequenceNr var iter = newIter() def newIter() = session.execute(preparedSelectMessages.bind(persistenceId, currentPnr, fromSnr, toSnr)).iterator final def hasNext: Boolean = { if (iter.hasNext) true else if (!inUse) false } else { currentPnr += 1 fromSnr = currentSnr iter = newIter() hasNext } } def next(): Row = { val row = iter.next() currentSnr = row.getLong("sequence_nr") row } }
  • 92. Non blocking asynchronous replay private[this] val queries: CassandraReadJournal = new CassandraReadJournal( extendedActorSystem, context.system.settings.config.getConfig("cassandra-query-journal")) override def asyncReplayMessages( persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long)(replayCallback: (PersistentRepr) => Unit): Future[Unit] = queries .eventsByPersistenceId( persistenceId, fromSequenceNr, toSequenceNr, max, replayMaxResultSize, None, "asyncReplayMessages") .runForeach(replayCallback) .map(_ => ())
  • 93. private[this] val queries: CassandraReadJournal = new CassandraReadJournal( extendedActorSystem, context.system.settings.config.getConfig("cassandra-query-journal")) override def asyncReplayMessages( persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long)(replayCallback: (PersistentRepr) => Unit): Future[Unit] = queries .eventsByPersistenceId( persistenceId, fromSequenceNr, toSequenceNr, max, replayMaxResultSize, None, "asyncReplayMessages") .runForeach(replayCallback) .map(_ => ())
  • 94. Benchmarks 5000 10 000 15 000 20 000 25 000 30 000 35 000 40 000 5000 10 000 15 000 20 000 25 000 30 000 35 000 40 000 0 0 10 000 20 000 30 000 40 000 0 50 000 Time(ms) Time(ms) Time(ms) Actors Threads, Actors Threads 20 40 60 80 100 120 1405000 10000 15000 20000 25000 30000 10 20 30 40 50 60 70 45 000 50 000 blocking asynchronous REPLAY STRONG SCALING WEAK SCALING
  • 95. my-dispatcher { type = "Dispatcher" executor = "thread-pool-executor" thread-pool-executor { fixed-pool-size = $fixedPoolSize } throughput = $throughput } my-dispatcher { type = "Dispatcher" executor = "fork-join-executor" fork-join-executor { parallelism-min = $parallelismMin parallelism-max = $parallelismMax parallelism-factor = $parallelismFactor } throughput = $throughput }
  • 96. cassandra-journal { plugin-dispatcher = $pluginDispatcher replay-dispatcher = $replayDispatcher max-result-size = $resultSize max-result-size-replay = $resultSizeReplay target-partition-size = $partitionSize } cassandra-query-journal { plugin-dispatcher = $queryPluginDispatcher max-buffer-size = $bufferSize max-result-size-query = $resultSizeReplay }
  • 97. node_id Alternative architecture 0 1 persistence_id 0, event 0 persistence_id 0, event 1 persistence_id 1, event 0 persistence_id 0, event 2 persistence_id 2, event 0 persistence_id 0, event 3
  • 98. persistence_id 0, event 0 persistence_id 0, event 1 persistence_id 1, event 0 persistence_id 2, event 0 persistence_id 0, event 2 persistence_id 0, event 3
  • 99. tag 1 0 all Ids Id 0, event 1 Id 2, event 1 0 1 0 0 event 1event o
  • 100. tag 1 0 allIds Id 0, event 1 Id 2, event 1 0 1 0 0 event 0 event 1 val boundStatements = statementGroup(eventsByPersistenceId, eventsByTag, allPersistenceIds) Future.sequence(boundStatements).flatMap { stmts => val batch = new BatchStatement().setConsistencyLevel(...).setRetryPolicy(...) stmts.foreach(batch.add) session.underlying().flatMap(_.executeAsync(batch)) }
  • 101. tag 1 0 allIds Id 0, event 1 Id 2, event 1 0 1 0 0 event 0 event 1 val boundStatements = statementGroup(eventsByPersistenceId, eventsByTag, allPersistenceIds) Future.sequence(boundStatements).flatMap { stmts => val batch = new BatchStatement().setConsistencyLevel(...).setRetryPolicy(...) stmts.foreach(batch.add) session.underlying().flatMap(_.executeAsync(batch)) }
  • 102. val eventsByPersistenceIdStatement = statementGroup(eventsByPersistenceIdStatement) val boundStatements = statementGroup(eventsByTagStatement, allPersistenceIdsStatement) ... session.underlying().flatMap { s => val ebpResult = s.executeAsync(eventsByPersistenceIdStatement) val batchResult = s.executeAsync(batch)) ... } tag 1 0 allIds Id 0, event 1 Id 2, event 1 0 1 0 0 event 0 event 1
  • 103. val eventsByPersistenceIdStatement = statementGroup(eventsByPersistenceIdStatement) val boundStatements = statementGroup(eventsByTagStatement, allPersistenceIdsStatement) ... session.underlying().flatMap { s => val ebpResult = s.executeAsync(eventsByPersistenceIdStatement) val batchResult = s.executeAsync(batch)) ... } tag 1 0 allIds Id 0, event 1 Id 2, event 1 0 1 0 0 event 0 event 1
  • 104. Event time processing ● Ingestion time, processing time, event time
  • 106. Ordering 10 2 1 12:34:57 1 KEY TIME VALUE 2 12:34:58 2 KEY TIME VALUE 0 12:34:56 0 KEY TIME VALUE
  • 107. 0 1 2 1 12:34:57 1 KEY TIME VALUE 2 12:34:58 2 KEY TIME VALUE 0 12:34:56 0 KEY TIME VALUE
  • 108. Distributed causal stream merging Id 0, event 2 Id 0, event 1 Id 0, event 0 Id 1, event 00 1 Id 2, event 0 Id 0, event 3 node_id
  • 109. Id 0, event 2 Id 0, event 1 Id 0, event 0 Id 1, event 00 1 Id 2, event 0 Id 0, event 3 Id 0, event 0 node_id
  • 110. Id 0, event 2 Id 0, event 1 Id 0, event 0 Id 1, event 00 1 Id 2, event 0 Id 0, event 3 Id 0, event 0 node_id
  • 111. Id 0, event 2 Id 0, event 1 Id 0, event 0 Id 1, event 00 1 Id 2, event 0 Id 0, event 3 Id 0, event 0 node_id persistence _id seq 0 0 1 . . . 2 . . .
  • 112. persistence _id seq 0 1 1 . . . 2 . . . Id 0, event 2 Id 0, event 1 Id 0, event 0 Id 1, event 0 node_id 0 1 Id 2, event 0 Id 0, event 0 Id 0, event 1 Id 0, event 3
  • 113. persistence _id seq 0 2 1 0 2 0 Id 0, event 1 Id 0, event 0 Id 1, event 0 node_id 0 1 Id 2, event 0 Id 0, event 0 Id 0, event 1 Id 0, event 2 Id 0, event 3 Id 2, event 0 Id 0, event 2 Id 1, event 0
  • 114. Id 0, event 2 Id 0, event 1 Id 0, event 0 Id 1, event 00 1 Id 2, event 0 Id 0, event 3 Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 2 Id 0, event 3 node_id Id 1, event 0 persistence _id seq 0 3 1 0 2 0
  • 115. Id 0, event 2 Id 0, event 1 Id 0, event 0 Id 1, event 00 1 Id 2, event 0 Id 0, event 3 Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 2 node_id Id 1, event 0 0 0 Id 0, event 0 Id 0, event 1 Replay
  • 116. Id 0, event 2 Id 0, event 1 Id 0, event 0 Id 1, event 00 1 Id 2, event 0 Id 0, event 3 Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 2 node_id Id 1, event 0 0 0 Id 0, event 0 Id 0, event 1
  • 117. Id 0, event 2 Id 0, event 1 Id 0, event 0 Id 1, event 00 1 Id 2, event 0 Id 0, event 3 Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 2 Id 1, event 0 0 0 Id 0, event 0 Id 0, event 1 node_id
  • 118. Id 0, event 2 Id 0, event 1 Id 0, event 0 Id 1, event 00 1 Id 2, event 0 Id 0, event 3 Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 2 Id 1, event 0 0 0 Id 0, event 0 Id 0, event 1 node_id persistence _id seq 0 2
  • 119. Id 0, event 2 Id 0, event 1 Id 0, event 0 Id 1, event 00 Id 2, event 0 Id 0, event 3 Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 2 Id 1, event 0 0 0 Id 0, event 0 Id 0, event 1 persistence _id seq 0 2 stream_id seq 0 1 1 2 1 node_id
  • 121. Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 2 Id 0, event 3 Id 1, event 0
  • 122. Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 2 Id 0, event 3 Id 1, event 0
  • 123. Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 2 Id 0, event 3 Id 1, event 0 Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 3 Id 1, event 0 ACK ACK ACK ACK ACK
  • 124. Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 2 Id 0, event 3 Id 1, event 0 Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 3 Id 1, event 0 ACK ACK ACK ACK ACK
  • 125. Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 2 Id 0, event 3 Id 1, event 0 Id 0, event 0 Id 0, event 1 Id 2, event 0 Id 0, event 3 Id 1, event 0 ACK ACK ACK ACK ACK
  • 126. Exactly once delivery ● Durable offset 0 1 2 3 4
  • 127. 0 1 2 3 4
  • 128. 10 2 3 4
  • 130. node_id 0 1 Id 0, event 0 Id 0, event 1 Id 1, event 0 Id 0, event 2 Id 2, event 0 Id 0, event 3 Id 0, event 0 Id 0, event 1 Id 1, event 0 Id 2, event 0 Id 0, event 2 Id 0, event 3 tag 1 0 allIds Id 0, event 1 Id 2, event 1 0 1 0 0 event 0 event 1
  • 131. val conf = new SparkConf().setAppName("...").setMaster("...").set("spark.cassandra.connection.host", "...") val sc = new SparkContext(conf) implicit val ordering = new Ordering[(String, Double)] { override def compare(x: (String, Double), y: (String, Double)): Int = implicitly[Ordering[Double]].compare(x._2, y._2) } sc.eventTable() .cache() .flatMap { case (JournalKey(persistenceId, _, _), BalanceUpdatedEvent(change)) => (persistenceId -> change) :: Nil case _ => Nil } .reduceByKey(_ + _) .top(100) .foreach(println) sc.stop() Akka Analytics
  • 132. val conf = new SparkConf().setAppName("...").setMaster("...").set("spark.cassandra.connection.host", "...") val sc = new StreamingContext(conf, Seconds(5)) implicit val ordering = new Ordering[(String, Double)] { override def compare(x: (String, Double), y: (String, Double)): Int = implicitly[Ordering[Double]].compare(x._2, y._2) } sc.eventTable() .cache() .flatMap { case (JournalKey(persistenceId, _, _), BalanceUpdatedEvent(change)) => (persistenceId -> change) :: Nil case _ => Nil } .reduceByKey(_ + _) .top(100) .foreach(println) sc.stop()
  • 135. Client 1 Client 2 Client 3 Update Update Update Model devices Model devices Model devices Input data Input data Input data Parameter devices P ΔP ΔP ΔP
  • 136. Challenges ● All the solved problems ○ Exactly once delivery ○ Consistency ○ Availability ○ Fault tolerance ○ Cross service invariants and consistency ○ Transactions ○ Automated deployment and configuration management ○ Serialization, versioning, compatibility ○ Automated elasticity ○ No downtime version upgrades ○ Graceful shutdown of nodes ○ Distributed system verification, logging, tracing, monitoring, debugging ○ Split brains ○ ...
  • 137. Conclusion ● From request, response, synchronous, mutable state ● To streams, asynchronous messaging ● Production ready distributed systems
  • 139. MANCHESTER LONDON NEW YORK @zapletal_martin @cakesolutions 347 708 1518 [email protected] We are hiring http://www.cakesolutions.net/careers