# HG changeset patch # User Keir Fraser # Date 1278093394 -3600 # Node ID ae68758f8862bc43ab6bbe4ad3a8594c28b9bc39 # Parent 19f4d637a52b8723ac1fbcf666c146951bee8e57 trace: fix security issues After getting a report of 3.2.3's xenmon crashing Xen (as it turned out this was because c/s 17000 was backported to that tree without also applying c/s 17515), I figured that the hypervisor shouldn't rely on any specific state of the actual trace buffer (as it is shared writable with Dom0) [GWD: Volatile quantifiers have been taken out and moved to another patch] To make clear what purpose specific variables have and/or where they got loaded from, the patch also changes the type of some of them to be explicitly u32/s32, and removes pointless assertions (like checking an unsigned variable to be >= 0). I also took the prototype adjustment of __trace_var() as an opportunity to simplify the TRACE_xD() macros. Similar simplification could be done on the (quite numerous) direct callers of the function. Signed-off-by: Jan Beulich Signed-off-by: George Dunlap # HG changeset patch # User Keir Fraser # Date 1278314658 -3600 # Node ID 9074d50d09358cd8349d54c7ab2e2ead81fa1570 # Parent f483b5ce7be235494156fee164decd73e0472cb7 trace: insert compiler memory barriers This is to ensure fields shared writably with Dom0 get read only once for any consistency checking followed by actual calculations. I realized there was another multiple-read issue, a fix for which is also included (which at once simplifies __insert_record()). Signed-off-by: Jan Beulich --- a/xen/common/trace.c +++ b/xen/common/trace.c @@ -52,12 +52,12 @@ static struct t_info *t_info; static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); -static int data_size; +static u32 data_size; static u32 t_info_first_offset __read_mostly; /* High water mark for trace buffers; */ /* Send virtual interrupt when buffer level reaches this point */ -static int t_buf_highwater; +static u32 t_buf_highwater; /* Number of records lost due to per-CPU trace buffer being full. */ static DEFINE_PER_CPU(unsigned long, lost_records); @@ -162,7 +162,7 @@ static int alloc_trace_bufs(void) spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); - buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf; + per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf; buf->cons = buf->prod = 0; per_cpu(t_data, cpu) = (unsigned char *)(buf + 1); @@ -213,6 +213,7 @@ out_dealloc: spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) ) { + per_cpu(t_bufs, cpu) = NULL; ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated)); free_xenheap_pages(rawbuf, order); } @@ -418,19 +419,39 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc return rc; } -static inline int calc_rec_size(int cycles, int extra) +static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra) { - int rec_size; - rec_size = 4; + unsigned int rec_size = 4; + if ( cycles ) rec_size += 8; rec_size += extra; return rec_size; } -static inline int calc_unconsumed_bytes(struct t_buf *buf) +static inline bool_t bogus(u32 prod, u32 cons) { - int x = buf->prod - buf->cons; + if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) || + unlikely(cons & 3) || unlikely(cons >= 2 * data_size) ) + { + tb_init_done = 0; + printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n", + smp_processor_id(), prod, cons); + return 1; + } + return 0; +} + +static inline u32 calc_unconsumed_bytes(const struct t_buf *buf) +{ + u32 prod = buf->prod, cons = buf->cons; + s32 x; + + barrier(); /* must read buf->prod and buf->cons only once */ + if ( bogus(prod, cons) ) + return data_size; + + x = prod - cons; if ( x < 0 ) x += 2*data_size; @@ -440,9 +461,16 @@ static inline int calc_unconsumed_bytes( return x; } -static inline int calc_bytes_to_wrap(struct t_buf *buf) +static inline u32 calc_bytes_to_wrap(const struct t_buf *buf) { - int x = data_size - buf->prod; + u32 prod = buf->prod, cons = buf->cons; + s32 x; + + barrier(); /* must read buf->prod and buf->cons only once */ + if ( bogus(prod, cons) ) + return 0; + + x = data_size - prod; if ( x <= 0 ) x += data_size; @@ -452,55 +480,60 @@ static inline int calc_bytes_to_wrap(str return x; } -static inline int calc_bytes_avail(struct t_buf *buf) +static inline u32 calc_bytes_avail(const struct t_buf *buf) { return data_size - calc_unconsumed_bytes(buf); } -static inline struct t_rec * -next_record(struct t_buf *buf) +static inline struct t_rec *next_record(const struct t_buf *buf, + uint32_t *next) { - int x = buf->prod; + u32 x = buf->prod, cons = buf->cons; + + barrier(); /* must read buf->prod and buf->cons only once */ + *next = x; + if ( !tb_init_done || bogus(x, cons) ) + return NULL; + if ( x >= data_size ) x -= data_size; - ASSERT(x >= 0); ASSERT(x < data_size); return (struct t_rec *)&this_cpu(t_data)[x]; } -static inline int __insert_record(struct t_buf *buf, - unsigned long event, - int extra, - int cycles, - int rec_size, - unsigned char *extra_data) +static inline void __insert_record(struct t_buf *buf, + unsigned long event, + unsigned int extra, + bool_t cycles, + unsigned int rec_size, + const void *extra_data) { struct t_rec *rec; unsigned char *dst; - unsigned long extra_word = extra/sizeof(u32); - int local_rec_size = calc_rec_size(cycles, extra); + unsigned int extra_word = extra / sizeof(u32); + unsigned int local_rec_size = calc_rec_size(cycles, extra); uint32_t next; BUG_ON(local_rec_size != rec_size); BUG_ON(extra & 3); + rec = next_record(buf, &next); + if ( !rec ) + return; /* Double-check once more that we have enough space. * Don't bugcheck here, in case the userland tool is doing * something stupid. */ - next = calc_bytes_avail(buf); - if ( next < rec_size ) + if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size ) { if ( printk_ratelimit() ) printk(XENLOG_WARNING - "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n", - __func__, next, data_size, buf->prod, buf->cons, rec_size); - return 0; + "%s: size=%08x prod=%08x cons=%08x rec=%u\n", + __func__, data_size, next, buf->cons, rec_size); + return; } - rmb(); - rec = next_record(buf); rec->event = event; rec->extra_u32 = extra_word; dst = (unsigned char *)rec->u.nocycles.extra_u32; @@ -517,21 +550,19 @@ static inline int __insert_record(struct wmb(); - next = buf->prod + rec_size; + next += rec_size; if ( next >= 2*data_size ) next -= 2*data_size; - ASSERT(next >= 0); ASSERT(next < 2*data_size); buf->prod = next; - - return rec_size; } -static inline int insert_wrap_record(struct t_buf *buf, int size) +static inline void insert_wrap_record(struct t_buf *buf, + unsigned int size) { - int space_left = calc_bytes_to_wrap(buf); - unsigned long extra_space = space_left - sizeof(u32); - int cycles = 0; + u32 space_left = calc_bytes_to_wrap(buf); + unsigned int extra_space = space_left - sizeof(u32); + bool_t cycles = 0; BUG_ON(space_left > size); @@ -543,17 +574,13 @@ static inline int insert_wrap_record(str ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX); } - return __insert_record(buf, - TRC_TRACE_WRAP_BUFFER, - extra_space, - cycles, - space_left, - NULL); + __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles, + space_left, NULL); } #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */ -static inline int insert_lost_records(struct t_buf *buf) +static inline void insert_lost_records(struct t_buf *buf) { struct { u32 lost_records; @@ -568,12 +595,8 @@ static inline int insert_lost_records(st this_cpu(lost_records) = 0; - return __insert_record(buf, - TRC_LOST_RECORDS, - sizeof(ed), - 1 /* cycles */, - LOST_REC_SIZE, - (unsigned char *)&ed); + __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */, + LOST_REC_SIZE, &ed); } /* @@ -595,13 +618,15 @@ static DECLARE_TASKLET(trace_notify_dom0 * failure, otherwise 0. Failure occurs only if the trace buffers are not yet * initialised. */ -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data) +void __trace_var(u32 event, bool_t cycles, unsigned int extra, + const void *extra_data) { struct t_buf *buf; - unsigned long flags, bytes_to_tail, bytes_to_wrap; - int rec_size, total_size; - int extra_word; - int started_below_highwater = 0; + unsigned long flags; + u32 bytes_to_tail, bytes_to_wrap; + unsigned int rec_size, total_size; + unsigned int extra_word; + bool_t started_below_highwater; if( !tb_init_done ) return; @@ -640,7 +665,11 @@ void __trace_var(u32 event, int cycles, buf = this_cpu(t_bufs); if ( unlikely(!buf) ) + { + /* Make gcc happy */ + started_below_highwater = 0; goto unlock; + } started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater); @@ -721,8 +750,9 @@ unlock: spin_unlock_irqrestore(&this_cpu(t_lock), flags); /* Notify trace buffer consumer that we've crossed the high water mark. */ - if ( started_below_highwater && - (calc_unconsumed_bytes(buf) >= t_buf_highwater) ) + if ( likely(buf!=NULL) + && started_below_highwater + && (calc_unconsumed_bytes(buf) >= t_buf_highwater) ) tasklet_schedule(&trace_notify_dom0_tasklet); } --- a/xen/include/xen/trace.h +++ b/xen/include/xen/trace.h @@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op int trace_will_trace_event(u32 event); -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data); +void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *); static inline void trace_var(u32 event, int cycles, int extra, unsigned char *extra_data) @@ -57,7 +57,7 @@ static inline void trace_var(u32 event, { \ u32 _d[1]; \ _d[0] = d1; \ - __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -68,7 +68,7 @@ static inline void trace_var(u32 event, u32 _d[2]; \ _d[0] = d1; \ _d[1] = d2; \ - __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -80,7 +80,7 @@ static inline void trace_var(u32 event, _d[0] = d1; \ _d[1] = d2; \ _d[2] = d3; \ - __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -93,7 +93,7 @@ static inline void trace_var(u32 event, _d[1] = d2; \ _d[2] = d3; \ _d[3] = d4; \ - __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -107,7 +107,7 @@ static inline void trace_var(u32 event, _d[2] = d3; \ _d[3] = d4; \ _d[4] = d5; \ - __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -122,7 +122,7 @@ static inline void trace_var(u32 event, _d[3] = d4; \ _d[4] = d5; \ _d[5] = d6; \ - __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 )