[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v5 14/21] migration: Create the postcopy preempt channel asyn
From: |
Dr. David Alan Gilbert |
Subject: |
Re: [PATCH v5 14/21] migration: Create the postcopy preempt channel asynchronously |
Date: |
Thu, 12 May 2022 18:35:40 +0100 |
User-agent: |
Mutt/2.2.1 (2022-02-19) |
* Peter Xu ([email protected]) wrote:
> This patch allows the postcopy preempt channel to be created
> asynchronously. The benefit is that when the connection is slow, we won't
> take the BQL (and potentially block all things like QMP) for a long time
> without releasing.
>
> A function postcopy_preempt_wait_channel() is introduced, allowing the
> migration thread to be able to wait on the channel creation. The channel
> is always created by the main thread, in which we'll kick a new semaphore
> to tell the migration thread that the channel has created.
>
> We'll need to wait for the new channel in two places: (1) when there's a
> new postcopy migration that is starting, or (2) when there's a postcopy
> migration to resume.
>
> For the start of migration, we don't need to wait for this channel until
> when we want to start postcopy, aka, postcopy_start(). We'll fail the
> migration if we found that the channel creation failed (which should
> probably not happen at all in 99% of the cases, because the main channel is
> using the same network topology).
>
> For a postcopy recovery, we'll need to wait in postcopy_pause(). In that
> case if the channel creation failed, we can't fail the migration or we'll
> crash the VM, instead we keep in PAUSED state, waiting for yet another
> recovery.
>
> Signed-off-by: Peter Xu <[email protected]>
Reviewed-by: Dr. David Alan Gilbert <[email protected]>
> ---
> migration/migration.c | 16 ++++++++++++
> migration/migration.h | 7 +++++
> migration/postcopy-ram.c | 56 +++++++++++++++++++++++++++++++---------
> migration/postcopy-ram.h | 1 +
> 4 files changed, 68 insertions(+), 12 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index a0db5de685..cce741e20e 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -3020,6 +3020,12 @@ static int postcopy_start(MigrationState *ms)
> int64_t bandwidth = migrate_max_postcopy_bandwidth();
> bool restart_block = false;
> int cur_state = MIGRATION_STATUS_ACTIVE;
> +
> + if (postcopy_preempt_wait_channel(ms)) {
> + migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
> + return -1;
> + }
> +
> if (!migrate_pause_before_switchover()) {
> migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
> MIGRATION_STATUS_POSTCOPY_ACTIVE);
> @@ -3501,6 +3507,14 @@ static MigThrError postcopy_pause(MigrationState *s)
> if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
> /* Woken up by a recover procedure. Give it a shot */
>
> + if (postcopy_preempt_wait_channel(s)) {
> + /*
> + * Preempt enabled, and new channel create failed; loop
> + * back to wait for another recovery.
> + */
> + continue;
> + }
> +
> /*
> * Firstly, let's wake up the return path now, with a new
> * return path channel.
> @@ -4360,6 +4374,7 @@ static void migration_instance_finalize(Object *obj)
> qemu_sem_destroy(&ms->postcopy_pause_sem);
> qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
> qemu_sem_destroy(&ms->rp_state.rp_sem);
> + qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
> error_free(ms->error);
> }
>
> @@ -4406,6 +4421,7 @@ static void migration_instance_init(Object *obj)
> qemu_sem_init(&ms->rp_state.rp_sem, 0);
> qemu_sem_init(&ms->rate_limit_sem, 0);
> qemu_sem_init(&ms->wait_unplug_sem, 0);
> + qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
> qemu_mutex_init(&ms->qemu_file_lock);
> }
>
> diff --git a/migration/migration.h b/migration/migration.h
> index 91f845e9e4..f898b8547a 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -219,6 +219,13 @@ struct MigrationState {
> QEMUFile *to_dst_file;
> /* Postcopy specific transfer channel */
> QEMUFile *postcopy_qemufile_src;
> + /*
> + * It is posted when the preempt channel is established. Note: this is
> + * used for both the start or recover of a postcopy migration. We'll
> + * post to this sem every time a new preempt channel is created in the
> + * main thread, and we keep post() and wait() in pair.
> + */
> + QemuSemaphore postcopy_qemufile_src_sem;
> QIOChannelBuffer *bioc;
> /*
> * Protects to_dst_file/from_dst_file pointers. We need to make sure we
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index b3c81b46f6..1bb603051a 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -1552,10 +1552,50 @@ bool
> postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
> return true;
> }
>
> -int postcopy_preempt_setup(MigrationState *s, Error **errp)
> +static void
> +postcopy_preempt_send_channel_new(QIOTask *task, gpointer opaque)
> {
> - QIOChannel *ioc;
> + MigrationState *s = opaque;
> + QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task));
> + Error *local_err = NULL;
> +
> + if (qio_task_propagate_error(task, &local_err)) {
> + /* Something wrong happened.. */
> + migrate_set_error(s, local_err);
> + error_free(local_err);
> + } else {
> + migration_ioc_register_yank(ioc);
> + s->postcopy_qemufile_src = qemu_fopen_channel_output(ioc);
> + trace_postcopy_preempt_new_channel();
> + }
> +
> + /*
> + * Kick the waiter in all cases. The waiter should check upon
> + * postcopy_qemufile_src to know whether it failed or not.
> + */
> + qemu_sem_post(&s->postcopy_qemufile_src_sem);
> + object_unref(OBJECT(ioc));
> +}
>
> +/* Returns 0 if channel established, -1 for error. */
> +int postcopy_preempt_wait_channel(MigrationState *s)
> +{
> + /* If preempt not enabled, no need to wait */
> + if (!migrate_postcopy_preempt()) {
> + return 0;
> + }
> +
> + /*
> + * We need the postcopy preempt channel to be established before
> + * starting doing anything.
> + */
> + qemu_sem_wait(&s->postcopy_qemufile_src_sem);
> +
> + return s->postcopy_qemufile_src ? 0 : -1;
> +}
> +
> +int postcopy_preempt_setup(MigrationState *s, Error **errp)
> +{
> if (!migrate_postcopy_preempt()) {
> return 0;
> }
> @@ -1566,16 +1606,8 @@ int postcopy_preempt_setup(MigrationState *s, Error
> **errp)
> return -1;
> }
>
> - ioc = socket_send_channel_create_sync(errp);
> -
> - if (ioc == NULL) {
> - return -1;
> - }
> -
> - migration_ioc_register_yank(ioc);
> - s->postcopy_qemufile_src = qemu_fopen_channel_output(ioc);
> -
> - trace_postcopy_preempt_new_channel();
> + /* Kick an async task to connect */
> + socket_send_channel_create(postcopy_preempt_send_channel_new, s);
>
> return 0;
> }
> diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
> index 34b1080cde..6147bf7d1d 100644
> --- a/migration/postcopy-ram.h
> +++ b/migration/postcopy-ram.h
> @@ -192,5 +192,6 @@ enum PostcopyChannels {
>
> bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile
> *file);
> int postcopy_preempt_setup(MigrationState *s, Error **errp);
> +int postcopy_preempt_wait_channel(MigrationState *s);
>
> #endif
> --
> 2.32.0
>
--
Dr. David Alan Gilbert / [email protected] / Manchester, UK
- Re: [PATCH v5 14/21] migration: Create the postcopy preempt channel asynchronously,
Dr. David Alan Gilbert <=