qcow2: Don't rely on free_cluster_index in alloc_refcount_block() (CVE-2014-0147)

free_cluster_index is only correct if update_refcount() was called from
an allocation function, and even there it's brittle because it's used to
protect unfinished allocations which still have a refcount of 0 - if it
moves in the wrong place, the unfinished allocation can be corrupted.

So not using it any more seems to be a good idea. Instead, use the
first requested cluster to do the calculations. Return -EAGAIN if
unfinished allocations could become invalid and let the caller restart
its search for some free clusters.

The context of creating a snapsnot is one situation where
update_refcount() is called outside of a cluster allocation. For this
case, the change fixes a buffer overflow if a cluster is referenced in
an L2 table that cannot be represented by an existing refcount block.
(new_table[refcount_table_index] was out of bounds)

[Bump the qemu-iotests 026 refblock_alloc.write leak count from 10 to
11.
--Stefan]

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit b106ad9185)
[AF: BNC#870439; dropped QCOW2_DISCARD_NEVER argument to
     qcow2_free_clusters() and update_refcount(), dropped iotests]
Signed-off-by: Andreas Färber <afaerber@suse.de>
This commit is contained in:
Kevin Wolf
2014-03-28 18:06:31 +01:00
committed by Bruce Rogers
parent da96690d12
commit 176d3f3351
3 changed files with 45 additions and 42 deletions

View File

@@ -191,10 +191,11 @@ static int alloc_refcount_block(BlockDriverState *bs,
* they can describe them themselves.
*
* - We need to consider that at this point we are inside update_refcounts
* and doing the initial refcount increase. This means that some clusters
* have already been allocated by the caller, but their refcount isn't
* accurate yet. free_cluster_index tells us where this allocation ends
* as long as we don't overwrite it by freeing clusters.
* and potentially doing an initial refcount increase. This means that
* some clusters have already been allocated by the caller, but their
* refcount isn't accurate yet. If we allocate clusters for metadata, we
* need to return -EAGAIN to signal the caller that it needs to restart
* the search for free clusters.
*
* - alloc_clusters_noref and qcow2_free_clusters may load a different
* refcount block into the cache
@@ -278,7 +279,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
s->refcount_table[refcount_table_index] = new_block;
return 0;
/* The new refcount block may be where the caller intended to put its
* data, so let it restart the search. */
return -EAGAIN;
}
ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
@@ -301,8 +305,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
/* Calculate the number of refcount blocks needed so far */
uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
uint64_t blocks_used = (s->free_cluster_index +
refcount_block_clusters - 1) / refcount_block_clusters;
uint64_t blocks_used = DIV_ROUND_UP(cluster_index, refcount_block_clusters);
/* And now we need at least one block more for the new metadata */
uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
@@ -335,8 +338,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
/* Fill the new refcount table */
memcpy(new_table, s->refcount_table,
s->refcount_table_size * sizeof(uint64_t));
@@ -399,17 +400,18 @@ static int alloc_refcount_block(BlockDriverState *bs,
s->refcount_table_size = table_size;
s->refcount_table_offset = table_offset;
/* Free old table. Remember, we must not change free_cluster_index */
uint64_t old_free_cluster_index = s->free_cluster_index;
/* Free old table. */
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
s->free_cluster_index = old_free_cluster_index;
ret = load_refcount_block(bs, new_block, (void**) refcount_block);
if (ret < 0) {
return ret;
}
return 0;
/* If we were trying to do the initial refcount update for some cluster
* allocation, we might have used the same clusters to store newly
* allocated metadata. Make the caller search some new space. */
return -EAGAIN;
fail_table:
g_free(new_table);
@@ -578,12 +580,15 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
int ret;
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
offset = alloc_clusters_noref(bs, size);
if (offset < 0) {
return offset;
}
do {
offset = alloc_clusters_noref(bs, size);
if (offset < 0) {
return offset;
}
ret = update_refcount(bs, offset, size, 1);
} while (ret == -EAGAIN);
ret = update_refcount(bs, offset, size, 1);
if (ret < 0) {
return ret;
}
@@ -596,32 +601,29 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
{
BDRVQcowState *s = bs->opaque;
uint64_t cluster_index;
uint64_t old_free_cluster_index;
int i, refcount, ret;
/* Check how many clusters there are free */
cluster_index = offset >> s->cluster_bits;
for(i = 0; i < nb_clusters; i++) {
refcount = get_refcount(bs, cluster_index++);
do {
/* Check how many clusters there are free */
cluster_index = offset >> s->cluster_bits;
for(i = 0; i < nb_clusters; i++) {
refcount = get_refcount(bs, cluster_index++);
if (refcount < 0) {
return refcount;
} else if (refcount != 0) {
break;
if (refcount < 0) {
return refcount;
} else if (refcount != 0) {
break;
}
}
}
/* And then allocate them */
old_free_cluster_index = s->free_cluster_index;
s->free_cluster_index = cluster_index + i;
/* And then allocate them */
ret = update_refcount(bs, offset, i << s->cluster_bits, 1);
} while (ret == -EAGAIN);
ret = update_refcount(bs, offset, i << s->cluster_bits, 1);
if (ret < 0) {
return ret;
}
s->free_cluster_index = old_free_cluster_index;
return i;
}

View File

@@ -1301,7 +1301,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
*/
BlockDriverState* bs;
QCowHeader header;
uint8_t* refcount_table;
uint64_t* refcount_table;
int ret;
ret = bdrv_create_file(filename, options);
@@ -1343,9 +1343,10 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out;
}
/* Write an empty refcount table */
refcount_table = g_malloc0(cluster_size);
ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
/* Write a refcount table with one refcount block */
refcount_table = g_malloc0(2 * cluster_size);
refcount_table[0] = cpu_to_be64(2 * cluster_size);
ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
g_free(refcount_table);
if (ret < 0) {
@@ -1367,7 +1368,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out;
}
ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
if (ret < 0) {
goto out;

View File

@@ -487,7 +487,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write
write failed: No space left on device
10 leaked clusters were found on the image.
11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
@@ -511,7 +511,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write
write failed: No space left on device
10 leaked clusters were found on the image.
11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
@@ -535,7 +535,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write
write failed: No space left on device
10 leaked clusters were found on the image.
11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824