event_notifier_test_and_clear must be called before processing events.
Otherwise, an aio_poll could "eat" the notification before the main
I/O thread invokes ppoll().  The main I/O thread then never wakes up.
This is an example of what could happen:
   i/o thread       vcpu thread                     worker thread
   ---------------------------------------------------------------------
   lock_iothread
   notify_me = 1
   ...
   unlock_iothread
                                                     bh->scheduled = 1
                                                     event_notifier_set
                    lock_iothread
                    notify_me = 3
                    ppoll
                    notify_me = 1
                    aio_dispatch
                     aio_bh_poll
                      thread_pool_completion_bh
                                                     bh->scheduled = 1
                                                     event_notifier_set
                     node->io_read(node->opaque)
                      event_notifier_test_and_clear
   ppoll
   *** hang ***
"Tracing" with qemu_clock_get_ns shows pretty much the same behavior as
in the previous bug, so there are no new tricks here---just stare more
at the code until it is apparent.
One could also use a formal model, of course.  The included one shows
this with three processes: notifier corresponds to a QEMU thread pool
worker, temporary_waiter to a VCPU thread that invokes aio_poll(),
waiter to the main I/O thread.  I would be happy to say that the
formal model found the bug for me, but actually I wrote it after the
fact.
This patch is a bit of a big hammer.  The next one optimizes it,
with help (this time for real rather than a posteriori :)) from
another, similar formal model.
Reported-by: Richard W. M. Jones <rjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Message-id: 1437487673-23740-6-git-send-email-pbonzini@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
		
	
		
			
				
	
	
		
			141 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Promela
		
	
	
	
	
	
			
		
		
	
	
			141 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Promela
		
	
	
	
	
	
| /*
 | |
|  * This model describes a bug in aio_notify.  If ctx->notifier is
 | |
|  * cleared too late, a wakeup could be lost.
 | |
|  *
 | |
|  * Author: Paolo Bonzini <pbonzini@redhat.com>
 | |
|  *
 | |
|  * This file is in the public domain.  If you really want a license,
 | |
|  * the WTFPL will do.
 | |
|  *
 | |
|  * To verify the buggy version:
 | |
|  *     spin -a -DBUG docs/aio_notify_bug.promela
 | |
|  *     gcc -O2 pan.c
 | |
|  *     ./a.out -a -f
 | |
|  *
 | |
|  * To verify the fixed version:
 | |
|  *     spin -a docs/aio_notify_bug.promela
 | |
|  *     gcc -O2 pan.c
 | |
|  *     ./a.out -a -f
 | |
|  *
 | |
|  * Add -DCHECK_REQ to test an alternative invariant and the
 | |
|  * "notify_me" optimization.
 | |
|  */
 | |
| 
 | |
| int notify_me;
 | |
| bool event;
 | |
| bool req;
 | |
| bool notifier_done;
 | |
| 
 | |
| #ifdef CHECK_REQ
 | |
| #define USE_NOTIFY_ME 1
 | |
| #else
 | |
| #define USE_NOTIFY_ME 0
 | |
| #endif
 | |
| 
 | |
| active proctype notifier()
 | |
| {
 | |
|     do
 | |
|         :: true -> {
 | |
|             req = 1;
 | |
|             if
 | |
|                :: !USE_NOTIFY_ME || notify_me -> event = 1;
 | |
|                :: else -> skip;
 | |
|             fi
 | |
|         }
 | |
|         :: true -> break;
 | |
|     od;
 | |
|     notifier_done = 1;
 | |
| }
 | |
| 
 | |
| #ifdef BUG
 | |
| #define AIO_POLL                                                    \
 | |
|     notify_me++;                                                    \
 | |
|     if                                                              \
 | |
|         :: !req -> {                                                \
 | |
|             if                                                      \
 | |
|                 :: event -> skip;                                   \
 | |
|             fi;                                                     \
 | |
|         }                                                           \
 | |
|         :: else -> skip;                                            \
 | |
|     fi;                                                             \
 | |
|     notify_me--;                                                    \
 | |
|                                                                     \
 | |
|     req = 0;                                                        \
 | |
|     event = 0;
 | |
| #else
 | |
| #define AIO_POLL                                                    \
 | |
|     notify_me++;                                                    \
 | |
|     if                                                              \
 | |
|         :: !req -> {                                                \
 | |
|             if                                                      \
 | |
|                 :: event -> skip;                                   \
 | |
|             fi;                                                     \
 | |
|         }                                                           \
 | |
|         :: else -> skip;                                            \
 | |
|     fi;                                                             \
 | |
|     notify_me--;                                                    \
 | |
|                                                                     \
 | |
|     event = 0;                                                      \
 | |
|     req = 0;
 | |
| #endif
 | |
| 
 | |
| active proctype waiter()
 | |
| {
 | |
|     do
 | |
|        :: true -> AIO_POLL;
 | |
|     od;
 | |
| }
 | |
| 
 | |
| /* Same as waiter(), but disappears after a while.  */
 | |
| active proctype temporary_waiter()
 | |
| {
 | |
|     do
 | |
|        :: true -> AIO_POLL;
 | |
|        :: true -> break;
 | |
|     od;
 | |
| }
 | |
| 
 | |
| #ifdef CHECK_REQ
 | |
| never {
 | |
|     do
 | |
|         :: req -> goto accept_if_req_not_eventually_false;
 | |
|         :: true -> skip;
 | |
|     od;
 | |
| 
 | |
| accept_if_req_not_eventually_false:
 | |
|     if
 | |
|         :: req -> goto accept_if_req_not_eventually_false;
 | |
|     fi;
 | |
|     assert(0);
 | |
| }
 | |
| 
 | |
| #else
 | |
| /* There must be infinitely many transitions of event as long
 | |
|  * as the notifier does not exit.
 | |
|  *
 | |
|  * If event stayed always true, the waiters would be busy looping.
 | |
|  * If event stayed always false, the waiters would be sleeping
 | |
|  * forever.
 | |
|  */
 | |
| never {
 | |
|     do
 | |
|         :: !event    -> goto accept_if_event_not_eventually_true;
 | |
|         :: event     -> goto accept_if_event_not_eventually_false;
 | |
|         :: true      -> skip;
 | |
|     od;
 | |
| 
 | |
| accept_if_event_not_eventually_true:
 | |
|     if
 | |
|         :: !event && notifier_done  -> do :: true -> skip; od;
 | |
|         :: !event && !notifier_done -> goto accept_if_event_not_eventually_true;
 | |
|     fi;
 | |
|     assert(0);
 | |
| 
 | |
| accept_if_event_not_eventually_false:
 | |
|     if
 | |
|         :: event     -> goto accept_if_event_not_eventually_false;
 | |
|     fi;
 | |
|     assert(0);
 | |
| }
 | |
| #endif
 |