Skip to content

Commit

Permalink
KVM: Halt vcpu if page it tries to access is swapped out
Browse files Browse the repository at this point in the history
If a guest accesses swapped out memory do not swap it in from vcpu thread
context. Schedule work to do swapping and put vcpu into halted state
instead.

Interrupts will still be delivered to the guest and if interrupt will
cause reschedule guest will continue to run another task.

[avi: remove call to get_user_pages_noio(), nacked by Linus; this
      makes everything synchrnous again]

Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
  • Loading branch information
Gleb Natapov authored and avikivity committed Jan 12, 2011
1 parent 010c520 commit af585b9
Show file tree
Hide file tree
Showing 12 changed files with 570 additions and 16 deletions.
18 changes: 18 additions & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,14 @@
#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8

#define ASYNC_PF_PER_VCPU 64

extern spinlock_t kvm_lock;
extern struct list_head vm_list;

struct kvm_vcpu;
struct kvm;
struct kvm_async_pf;

enum kvm_reg {
VCPU_REGS_RAX = 0,
Expand Down Expand Up @@ -412,6 +415,11 @@ struct kvm_vcpu_arch {
u64 hv_vapic;

cpumask_var_t wbinvd_dirty_mask;

struct {
bool halted;
gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
} apf;
};

struct kvm_arch {
Expand Down Expand Up @@ -585,6 +593,10 @@ struct kvm_x86_ops {
const struct trace_print_flags *exit_reasons_str;
};

struct kvm_arch_async_pf {
gfn_t gfn;
};

extern struct kvm_x86_ops *kvm_x86_ops;

int kvm_mmu_module_init(void);
Expand Down Expand Up @@ -799,4 +811,10 @@ void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);

bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);

void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);

#endif /* _ASM_X86_KVM_HOST_H */
1 change: 1 addition & 0 deletions arch/x86/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
select KVM_ASYNC_PF
select USER_RETURN_NOTIFIER
select KVM_MMIO
---help---
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kvm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o eventfd.o \
assigned-dev.o)
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)

kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
i8254.o timer.o
Expand Down
52 changes: 51 additions & 1 deletion arch/x86/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
*
*/

#include "irq.h"
#include "mmu.h"
#include "x86.h"
#include "kvm_cache_regs.h"
#include "x86.h"

#include <linux/kvm_host.h>
#include <linux/types.h>
Expand Down Expand Up @@ -2587,6 +2589,50 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
error_code & PFERR_WRITE_MASK, gfn);
}

int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
{
struct kvm_arch_async_pf arch;
arch.gfn = gfn;

return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
}

static bool can_do_async_pf(struct kvm_vcpu *vcpu)
{
if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
kvm_event_needs_reinjection(vcpu)))
return false;

return kvm_x86_ops->interrupt_allowed(vcpu);
}

static bool try_async_pf(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva,
pfn_t *pfn)
{
bool async;

*pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async);

if (!async)
return false; /* *pfn has correct page already */

put_page(pfn_to_page(*pfn));

if (can_do_async_pf(vcpu)) {
trace_kvm_try_async_get_page(async, *pfn);
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
trace_kvm_async_pf_doublefault(gva, gfn);
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
return true;
} else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
return true;
}

*pfn = gfn_to_pfn(vcpu->kvm, gfn);

return false;
}

static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
u32 error_code)
{
Expand All @@ -2609,7 +2655,11 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,

mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, gfn);

if (try_async_pf(vcpu, gfn, gpa, &pfn))
return 0;

/* mmio */
if (is_error_pfn(pfn))
return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
spin_lock(&vcpu->kvm->mmu_lock);
Expand Down
4 changes: 3 additions & 1 deletion arch/x86/kvm/paging_tmpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,

mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);

if (try_async_pf(vcpu, walker.gfn, addr, &pfn))
return 0;

/* mmio */
if (is_error_pfn(pfn))
Expand Down
112 changes: 109 additions & 3 deletions arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <linux/uaccess.h>
#include <linux/hash.h>
#include <trace/events/kvm.h>

#define CREATE_TRACE_POINTS
Expand Down Expand Up @@ -155,6 +156,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {

u64 __read_mostly host_xcr0;

static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
{
int i;
for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
vcpu->arch.apf.gfns[i] = ~0;
}

static void kvm_on_user_return(struct user_return_notifier *urn)
{
unsigned slot;
Expand Down Expand Up @@ -5115,6 +5123,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->fpu_active = 0;
kvm_x86_ops->fpu_deactivate(vcpu);
}
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
/* Page is swapped out. Do synthetic halt */
vcpu->arch.apf.halted = true;
r = 1;
goto out;
}
}

r = kvm_mmu_reload(vcpu);
Expand Down Expand Up @@ -5243,7 +5257,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)

r = 1;
while (r > 0) {
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
r = vcpu_enter_guest(vcpu);
else {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
Expand All @@ -5256,6 +5271,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
break;
case KVM_MP_STATE_SIPI_RECEIVED:
default:
Expand All @@ -5277,6 +5293,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.request_irq_exits;
}

kvm_check_async_pf_completion(vcpu);

if (signal_pending(current)) {
r = -EINTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
Expand Down Expand Up @@ -5792,6 +5811,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)

kvm_make_request(KVM_REQ_EVENT, vcpu);

kvm_clear_async_pf_completion_queue(vcpu);
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.apf.halted = false;

return kvm_x86_ops->vcpu_reset(vcpu);
}

Expand Down Expand Up @@ -5880,6 +5903,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
goto fail_free_mce_banks;

kvm_async_pf_hash_reset(vcpu);

return 0;
fail_free_mce_banks:
kfree(vcpu->arch.mce_banks);
Expand Down Expand Up @@ -5938,8 +5963,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
/*
* Unpin any mmu pages first.
*/
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_clear_async_pf_completion_queue(vcpu);
kvm_unload_vcpu_mmu(vcpu);
}
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_free(vcpu);

Expand Down Expand Up @@ -6050,7 +6077,9 @@ void kvm_arch_flush_shadow(struct kvm *kvm)

int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
|| !list_empty_careful(&vcpu->async_pf.done)
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
|| vcpu->arch.nmi_pending ||
(kvm_arch_interrupt_allowed(vcpu) &&
Expand Down Expand Up @@ -6109,6 +6138,83 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
}
EXPORT_SYMBOL_GPL(kvm_set_rflags);

static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
{
return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
}

static inline u32 kvm_async_pf_next_probe(u32 key)
{
return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
}

static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
u32 key = kvm_async_pf_hash_fn(gfn);

while (vcpu->arch.apf.gfns[key] != ~0)
key = kvm_async_pf_next_probe(key);

vcpu->arch.apf.gfns[key] = gfn;
}

static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
{
int i;
u32 key = kvm_async_pf_hash_fn(gfn);

for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
(vcpu->arch.apf.gfns[key] != gfn ||
vcpu->arch.apf.gfns[key] == ~0); i++)
key = kvm_async_pf_next_probe(key);

return key;
}

bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
}

static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
u32 i, j, k;

i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
while (true) {
vcpu->arch.apf.gfns[i] = ~0;
do {
j = kvm_async_pf_next_probe(j);
if (vcpu->arch.apf.gfns[j] == ~0)
return;
k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
/*
* k lies cyclically in ]i,j]
* | i.k.j |
* |....j i.k.| or |.k..j i...|
*/
} while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
i = j;
}
}

void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
trace_kvm_async_pf_not_present(work->gva);

kvm_make_request(KVM_REQ_APF_HALT, vcpu);
kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
}

void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
trace_kvm_async_pf_ready(work->gva);
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
}

EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
Expand Down
Loading

0 comments on commit af585b9

Please sign in to comment.