tgt/tgt-git-update

1327 lines
37 KiB
Plaintext

diff --git a/doc/targets.conf.example b/doc/targets.conf.example
index 46be8fe..ac8cf69 100644
--- a/doc/targets.conf.example
+++ b/doc/targets.conf.example
@@ -81,6 +81,7 @@ default-driver iscsi
# Note that some parameters (write-cache, scsi_sn) were specified "globally".
# "Global" parameters will be applied to all LUNs; they can be overwritten
# "locally", per LUN.
+# If lun is not specified, it will be allocated automatically (first available).
<target iqn.2008-09.com.example:server.target5>
@@ -88,26 +89,26 @@ default-driver iscsi
vendor_id VENDOR1
removable 1
device-type cd
- # lun 1 # Not yet supported
+ lun 1
</direct-store>
<direct-store /dev/sda>
vendor_id VENDOR2
- # lun 2 # Not yet supported
+ lun 2
</direct-store>
<backing-store /dev/sdb1>
vendor_id back1
scsi_sn SERIAL
write-cache on
- # lun 3 # Not yet supported
+ # lun 3 # lun is commented out - will be allocated automatically
</backing-store>
<backing-store /dev/sdd1>
vendor_id back2
#mode_page 8:0:18:0x10:0:0xff....
#mode_page 8:0:18:0x10:0:0xff....
- # lun 4 # Not yet supported
+ lun 15
</backing-store>
# Some more parameters which can be specified locally or globally:
@@ -123,11 +124,12 @@ default-driver iscsi
#mode_page 8:0:18:0x10:0:0xff....
#mode_page 8:0:18:0x10:0:0xff....
#device-type
+ #allow-in-use # if specified globally, can't be overwritten locally
write-cache off
scsi_sn multipath-10
- # Parameters below are global. They can't be configured per LUN.
+ # Parameters below are only global. They can't be configured per LUN.
# Only allow connections from 192.168.100.1 and 192.168.200.5
initiator-address 192.168.100.1
initiator-address 192.168.200.5
@@ -142,10 +144,47 @@ default-driver iscsi
</target>
+# The device will have lun 1 unless you specify something else
+<target iqn.2008-09.com.example:server.target6>
+ backing-store /dev/LVM/somedevice
+ lun 10
+</target>
+
+
+# Devices which are in use (by system: mounted, for swap, part of RAID, or by
+# userspace: dd, by tgtd for another target etc.) can't be used, unless you use
+# --force flag or add 'allow-in-use yes' option
+<target iqn.2008-09.com.example:server.target7>
+ backing-store /dev/LVM/somedevice
+ allow-in-use yes
+</target>
+
+<target iqn.2008-09.com.example:server.target8>
+ <backing-store /dev/LVM/somedevice>
+ scsi_sn serial1
+ </backing-store>
+
+ <backing-store /dev/LVM/somedevice2>
+ scsi_sn serial2
+ </backing-store>
+
+ allow-in-use yes
+</target>
+
+
+
+
# Not supported configurations, and therefore, commented out:
-#<target iqn.2008-09.com.example:server.target6>
+#<target iqn.2008-09.com.example:server.target9>
+# backing-store /dev/LVM/somedevice1
+# backing-store /dev/LVM/somedevice2
+# lun 10
+# lun 11
+#</target>
+
+#<target iqn.2008-09.com.example:server.target10>
# <direct-store /dev/sdd>
# vendor_id VENDOR1
# </direct-store>
@@ -155,7 +194,7 @@ default-driver iscsi
# This one will break the parser:
-#<target iqn.2008-09.com.example:server.target7>
+#<target iqn.2008-09.com.example:server.target11>
# <direct-store /dev/sdd>
# vendor_id VENDOR1
# </direct-store>
diff --git a/scripts/tgt-admin b/scripts/tgt-admin
index e4be373..c352952 100755
--- a/scripts/tgt-admin
+++ b/scripts/tgt-admin
@@ -125,7 +125,7 @@ sub process_targets {
sub parse_configs {
# Parse the config
if ($alternate_conf ne 0) {
- # Check if alternative configuration file exist
+ # Check if alternative configuration file exists
if (-e "$alternate_conf") {
execute("# Using $alternate_conf as configuration file\n");
%conf = ParseConfig(-ConfigFile => "$alternate_conf", -UseApacheInclude => 1, -IncludeGlob => 1,);
@@ -211,6 +211,7 @@ sub add_targets {
# and other parameters which can be specified globally
my %target_options;
my $target_options_ref;
+ my $data_key;
foreach my $k3 (sort keys %{$conf{$k}{$k2}}) {
$lun = 1;
$option = $k3;
@@ -218,6 +219,7 @@ sub add_targets {
check_value($value);
$target_options{$option} = $value;
$target_options_ref = \%target_options;
+ $data_key = make_key($target_options_ref, "lun", "allow-in-use");
}
if (not defined $target_options{"driver"}) {
@@ -230,7 +232,7 @@ sub add_targets {
$option = $k3;
$value = $conf{$k}{$k2}{$k3};
check_value($value);
- process_options($target_options_ref);
+ process_options($target_options_ref,$data_key);
# If there was no option called "initiator-address", it means
# we want to allow ALL initiators for this target
if ($option eq "initiator-address") {
@@ -258,6 +260,27 @@ sub add_targets {
}
}
+# Pre-parse the config and get some values we need
+sub make_key {
+ my $target_options_ref = shift;
+ my @actions = @_;
+ my %data_key;
+
+ foreach my $action (@actions) {
+ if (ref $$target_options_ref{'backing-store'} eq "HASH") {
+ foreach my $testlun (keys %{$$target_options_ref{'backing-store'}}) {
+ $data_key{$testlun}{$action} = $$target_options_ref{'backing-store'}{$testlun}{$action};
+ }
+ }
+ if (ref $$target_options_ref{'direct-store'} eq "HASH") {
+ foreach my $testlun (keys %{$$target_options_ref{'direct-store'}}) {
+ $data_key{$testlun}{$action} = $$target_options_ref{'direct-store'}{$testlun}{$action};
+ }
+ }
+ }
+ return \%data_key;
+}
+
# Some options can be specified only once
sub check_if_hash_array {
my $check = $_[0];
@@ -285,9 +308,15 @@ sub check_exe {
foreach my $path (@path) {
if ( -x "$path/$command" && -f "$path/$command" ) { $exists = 1 }
}
- if ( $exists == 0 ) {
- print "Command $command (needed by $option option in your config file) is not in your path - can't continue!\n";
- exit 1;
+ if ($exists == 0) {
+ if ($command eq "sg_inq") {
+ print "Command '$command' (needed by '$option') is not in your path - can't continue!\n";
+ exit 1;
+ } elsif ($command eq "lsof") {
+ execute("# Command '$command' is not in your path.");
+ execute("# Can't reliably check if device is not in use.");
+ return 1;
+ }
}
}
@@ -315,27 +344,61 @@ sub add_params {
}
}
+# Find next available LUN
+sub find_next_lun {
+ my $backing_store = $_[0];
+ my $data_key_ref = $_[1];
+ my $lun_collision = 0;
+ my $lun_is_free = 0;
+ my $found_lun = 1;
+ while ($lun_is_free == 0) {
+ foreach my $testlun (keys %$data_key_ref) {
+ foreach my $testlun2 (values %{$$data_key_ref{$testlun}}) {
+ if ($found_lun eq $testlun2) {
+ $lun_collision = 1;
+ }
+ }
+ }
+ if ($lun_collision == 0) {
+ $lun_is_free = 1;
+ } else {
+ $found_lun += 1;
+ }
+ $lun_collision = 0;
+ }
+ $$data_key_ref{$backing_store}{'lun'} = $found_lun;
+ return $found_lun;
+}
+
# Add backing or direct store
sub add_backing_direct {
my $backing_store = $_[0];
my $target_options_ref = $_[1];
- my $lun = $_[2];
+ my $lun;
+ my $data_key_ref = $_[2];
my $direct_store = $_[3];
my $driver = $$target_options_ref{"driver"};
# Is the device in use?
- (my $can_alloc, my $dev) = check_device($backing_store);
+ my $can_alloc = 1;
+ if ($force != 1 && $$target_options_ref{'allow-in-use'} ne "yes") {
+ $can_alloc = check_device($backing_store,$data_key_ref);
+ }
- # Needed if the config file has mixed definitions
- if (ref($backing_store) eq "HASH") {
- foreach my $backing_store (sort keys %$value) {
- add_backing_direct($backing_store,$target_options_ref,$lun,$direct_store);
- $lun += 1;
- }
- return $lun;
- } elsif (-e $backing_store && $can_alloc == 1) {
+ if (-e $backing_store && ! -d $backing_store && $can_alloc == 1) {
my @exec_commands;
my $device_type;
+ my %luns;
+ my @added_luns;
+ # Find out LUNs which are "reserved" in the config file
+ if (ref $value eq "HASH") {
+ if (length $$data_key_ref{$backing_store}{'lun'}) {
+ $lun = $$data_key_ref{$backing_store}{'lun'};
+ } else {
+ # Find an available lun if it wasn't specified
+ $lun = find_next_lun($backing_store,$data_key_ref);
+ }
+ }
# Process parameters for each lun / backing store
if (ref $value eq "HASH") {
my %params_added;
@@ -447,6 +510,11 @@ sub add_backing_direct {
check_if_hash_array($$target_options_ref{"device-type"}, "device-type");
$device_type = $$target_options_ref{"device-type"};
}
+ # lun
+ if (length $$target_options_ref{"lun"}) {
+ check_if_hash_array($$target_options_ref{"lun"}, "lun");
+ $lun = $$target_options_ref{"lun"};
+ }
} else {
print "If you got here, this means your config file is not supported.\n";
print "Please report it to stgt mailing list and attach your config files.\n";
@@ -461,7 +529,9 @@ sub add_backing_direct {
$lun += 1;
return $lun;
} elsif ($can_alloc == 0) {
- execute("# Skipping device $backing_store ($dev is mounted / in use)");
+ execute("# Skipping device $backing_store - it is in use.");
+ execute("# You can override it with --force or 'allow-in-use yes' config option.");
+ execute("# Note - do so only if you know what you're doing, you may damage your data.");
} else {
execute("# Skipping device: $backing_store");
execute("# $backing_store does not exist - please check the configuration file");
@@ -471,11 +541,12 @@ sub add_backing_direct {
# Process options from the config file
sub process_options {
my $target_options_ref = $_[0];
+ my $data_key_ref = $_[1];
my $driver = $$target_options_ref{"driver"};
if ($option eq "backing-store" || $option eq "direct-store") {
my $direct_store = 0;
if ($option eq "direct-store") {
- check_exe("sg_inq", "direct-store");
+ check_exe("sg_inq", "option direct-store");
$direct_store = 1;
}
@@ -495,7 +566,13 @@ sub process_options {
if (ref($value) eq "HASH") {
foreach my $backing_store (sort keys %$value) {
- $lun = add_backing_direct($backing_store,$target_options_ref,$lun,$direct_store);
+ if ($backing_store =~ m/HASH/) {
+ print "\nYour config file is not supported. See targets.conf.example for details.\n";
+ exit 1;
+ }
+ }
+ foreach my $backing_store (sort keys %$value) {
+ add_backing_direct($backing_store,$target_options_ref,$data_key_ref,$direct_store);
}
}
}
@@ -569,7 +646,7 @@ sub dump_config {
my @all_targets = keys %tgtadm_output_tid;
- # If all targets use the same driver, us it only once in the config
+ # If all targets use the same driver, use it only once in the config
my $skip_driver = 0;
my @drivers_combined;
foreach my $current_target (@all_targets) {
@@ -976,63 +1053,43 @@ sub check_connected {
}
# Check if a device can be allocated
-my @rootfs_dev;
+# Device can be used "by system" (i.e. mounted, used as swap, as a part of
+# a RAID array etc.) or "by user" - i.e., already by tgtd, or someone doing:
+# dd if=/dev/1st_device of=/dev/2nd_device
+# We shouldn't allow a device to be used more than one time, as it could
+# cause corruption when written several times. Unless the user really wants to.
sub check_device {
- my $tmp_dev = $_[0];
-
- # Check if force flag is set
- if ( $force == 0) {
- # Check for rootfs devices
- &find_rootfs_device();
- $tmp_dev =~ s/\d//g;
- # Check if device is on the same disk as rootfs
- if (grep {$_ eq $tmp_dev} @rootfs_dev) {
- return (0,$tmp_dev);
- }
- }
- return 1;
-}
-
-# finds all the devices that rootfs is mounted on
-sub find_rootfs_device {
- my @files=("/etc/mtab","/proc/mounts");
- my @lines;
- # read files
- foreach my $file (@files){
- if (open(FH,"$file")) {
- @lines=(@lines,<FH>);
- close (FH);
- }
- }
+ my $backing_store = $_[0];
+ my $data_key_ref = $_[1];
- # parse files and finds all the device which mounted on /
- foreach my $line (@lines){
- chomp $line;
- if (($line=~/^\/dev\//) && ($line=~/ \/ /)){
- my @ln=split(' ',$line);
- $ln[0]=~s/\d//g;
- push(@rootfs_dev,$ln[0]);
- }
+ # If allow-in-use is "yes", there is no need to do
+ # farther tests
+ if ($$data_key_ref{$backing_store}{'allow-in-use'} eq "yes") {
+ return 1;
}
- # read swap file
- my $swap_file="/proc/swap";
- if (open(FH,"$swap_file")) {
- @lines=<FH>;
- close (FH);
+ # Check if the system uses this device
+ use Fcntl qw(O_RDONLY O_EXCL);
+ use Errno;
+ sysopen(FH, $backing_store, O_RDONLY | O_EXCL);
+ if ($!{EBUSY}) {
+ execute("# Device $backing_store is used by the system (mounted, used by swap?).");
+ return 0;
}
- # parse swap file and finds all the swap devices
- foreach my $line (@lines){
- chomp $line;
- if ($line=~/^\/dev\//) {
- my @ln=split(' ',$line);
- $ln[0]=~s/\d//g;
- push(@rootfs_dev,$ln[0]);
+ close(FH);
+
+ # Check if userspace uses this device
+ my $lsof_check = check_exe("lsof");
+ if ($lsof_check ne 1) {
+ system("lsof $backing_store &>/dev/null");
+ my $exit_value = $? >> 8;
+ if ($exit_value eq 0) {
+ execute("# Device $backing_store is used (already tgtd target?).");
+ execute("# Run 'lsof $backing_store' to see the details.");
+ return 0;
}
}
- # remove duplicate entries from @rootfs_dev
- my %seen = ();
- @rootfs_dev = grep { ! $seen{ $_ }++ } @rootfs_dev;
+ return 1;
}
# Execute or just print (or both) everything we start or would start
diff --git a/usr/Makefile b/usr/Makefile
index 82ddf07..a59364b 100644
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -58,7 +58,7 @@ PROGRAMS += tgtd tgtadm
SCRIPTS += ../scripts/tgt-setup-lun ../scripts/tgt-admin
TGTD_OBJS += tgtd.o mgmt.o target.o scsi.o log.o driver.o util.o work.o \
parser.o spc.o sbc.o mmc.o osd.o scc.o smc.o ssc.o bs_ssc.o \
- bs.o
+ bs_null.o bs.o
MANPAGES = ../doc/manpages/tgtadm.8 ../doc/manpages/tgt-admin.8 \
../doc/manpages/tgt-setup-lun.8
diff --git a/usr/be_byteshift.h b/usr/be_byteshift.h
index 5c6a619..82b7da6 100644
--- a/usr/be_byteshift.h
+++ b/usr/be_byteshift.h
@@ -40,6 +40,11 @@ static inline uint16_t get_unaligned_be16(const void *p)
return __get_unaligned_be16((const uint8_t *)p);
}
+static inline uint32_t get_unaligned_be24(const uint8_t *p)
+{
+ return p[0] << 16 | p[1] << 8 | p[2];
+}
+
static inline uint32_t get_unaligned_be32(const void *p)
{
return __get_unaligned_be32((const uint8_t *)p);
@@ -55,6 +60,13 @@ static inline void put_unaligned_be16(uint16_t val, void *p)
__put_unaligned_be16(val, p);
}
+static inline void put_unaligned_be24(uint32_t val, void *p)
+{
+ ((uint8_t *)p)[0] = (val >> 16) & 0xff;
+ ((uint8_t *)p)[1] = (val >> 8) & 0xff;
+ ((uint8_t *)p)[2] = val & 0xff;
+}
+
static inline void put_unaligned_be32(uint32_t val, void *p)
{
__put_unaligned_be32(val, p);
diff --git a/usr/bs.c b/usr/bs.c
index cef7b19..542ef55 100644
--- a/usr/bs.c
+++ b/usr/bs.c
@@ -173,7 +173,8 @@ static void *bs_thread_worker_fn(void *arg)
return NULL;
}
-int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn)
+int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn,
+ int nr_threads)
{
int i, ret;
@@ -205,12 +206,18 @@ int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn)
if (ret)
goto event_del;
- for (i = 0; i < ARRAY_SIZE(info->worker_thread); i++) {
+ if (nr_threads > ARRAY_SIZE(info->worker_thread)) {
+ eprintf("too many threads %d\n", nr_threads);
+ nr_threads = ARRAY_SIZE(info->worker_thread);
+ }
+
+ for (i = 0; i < nr_threads; i++) {
ret = pthread_create(&info->worker_thread[i], NULL,
bs_thread_worker_fn, info);
if (ret)
goto destroy_threads;
}
+
rewrite:
ret = write(info->command_fd[1], &ret, sizeof(ret));
if (ret < 0) {
@@ -261,7 +268,8 @@ void bs_thread_close(struct bs_thread_info *info)
info->stop = 1;
pthread_cond_broadcast(&info->pending_cond);
- for (i = 0; i < ARRAY_SIZE(info->worker_thread); i++)
+ for (i = 0; info->worker_thread[i] &&
+ i < ARRAY_SIZE(info->worker_thread); i++)
pthread_join(info->worker_thread[i], NULL);
pthread_cond_destroy(&info->finished_cond);
diff --git a/usr/bs_mmap.c b/usr/bs_mmap.c
index fff19d3..bb24f5e 100644
--- a/usr/bs_mmap.c
+++ b/usr/bs_mmap.c
@@ -96,7 +96,7 @@ static void bs_mmap_close(struct scsi_lu *lu)
static int bs_mmap_init(struct scsi_lu *lu)
{
struct bs_thread_info *info = BS_THREAD_I(lu);
- return bs_thread_open(info, bs_mmap_request);
+ return bs_thread_open(info, bs_mmap_request, NR_WORKER_THREADS);
}
static void bs_mmap_exit(struct scsi_lu *lu)
diff --git a/usr/bs_null.c b/usr/bs_null.c
new file mode 100644
index 0000000..00137ff
--- /dev/null
+++ b/usr/bs_null.c
@@ -0,0 +1,68 @@
+/*
+ * NULL I/O backing store routine
+ *
+ * Copyright (C) 2008 Alexander Nezhinsky <nezhinsky@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "list.h"
+#include "tgtd.h"
+#include "scsi.h"
+
+#define NULL_BS_DEV_SIZE (1ULL << 40)
+
+int bs_null_cmd_submit(struct scsi_cmd *cmd)
+{
+ scsi_set_result(cmd, SAM_STAT_GOOD);
+ return 0;
+}
+
+static int bs_null_open(struct scsi_lu *lu, char *path,
+ int *fd, uint64_t *size)
+{
+ *size = NULL_BS_DEV_SIZE;
+ dprintf("NULL backing store open, size: %" PRIu64 "\n", *size);
+ return 0;
+}
+
+static void bs_null_close(struct scsi_lu *lu)
+{
+}
+
+static int bs_null_cmd_done(struct scsi_cmd *cmd)
+{
+ return 0;
+}
+
+static struct backingstore_template null_bst = {
+ .bs_name = "null",
+ .bs_datasize = 0,
+ .bs_open = bs_null_open,
+ .bs_close = bs_null_close,
+ .bs_cmd_submit = bs_null_cmd_submit,
+ .bs_cmd_done = bs_null_cmd_done,
+};
+
+__attribute__((constructor)) static void bs_null_constructor(void)
+{
+ register_backingstore_template(&null_bst);
+}
diff --git a/usr/bs_rdwr.c b/usr/bs_rdwr.c
index e2ece4a..65a6136 100644
--- a/usr/bs_rdwr.c
+++ b/usr/bs_rdwr.c
@@ -147,7 +147,7 @@ static int bs_rdwr_init(struct scsi_lu *lu)
{
struct bs_thread_info *info = BS_THREAD_I(lu);
- return bs_thread_open(info, bs_rdwr_request);
+ return bs_thread_open(info, bs_rdwr_request, NR_WORKER_THREADS);
}
static void bs_rdwr_exit(struct scsi_lu *lu)
diff --git a/usr/bs_ssc.c b/usr/bs_ssc.c
index dcc3e30..b2e8818 100644
--- a/usr/bs_ssc.c
+++ b/usr/bs_ssc.c
@@ -208,7 +208,7 @@ static void bs_ssc_close(struct scsi_lu *lu)
static int bs_ssc_init(struct scsi_lu *lu)
{
struct bs_thread_info *info = BS_THREAD_I(lu);
- return bs_thread_open(info, ssc_rdwr_request);
+ return bs_thread_open(info, ssc_rdwr_request, 1);
}
static void bs_ssc_exit(struct scsi_lu *lu)
diff --git a/usr/bs_thread.h b/usr/bs_thread.h
index b97861c..b2975a5 100644
--- a/usr/bs_thread.h
+++ b/usr/bs_thread.h
@@ -33,7 +33,8 @@ static inline struct bs_thread_info *BS_THREAD_I(struct scsi_lu *lu)
return (struct bs_thread_info *) ((char *)lu + sizeof(*lu));
}
-extern int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn);
+extern int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn,
+ int nr_threads);
extern void bs_thread_close(struct bs_thread_info *info);
extern int bs_thread_cmd_submit(struct scsi_cmd *cmd);
diff --git a/usr/iscsi/iscsi_rdma.c b/usr/iscsi/iscsi_rdma.c
index 46e6ea8..d3b5147 100644
--- a/usr/iscsi/iscsi_rdma.c
+++ b/usr/iscsi/iscsi_rdma.c
@@ -144,6 +144,8 @@ struct conn_info {
/* but count so we can drain CQ on close */
int recvl_posted;
+ struct tgt_event tx_sched;
+
/* login phase resources, freed at full-feature */
void *srbuf_login;
void *listbuf_login;
@@ -194,6 +196,8 @@ struct iser_device {
void *mempool_listbuf;
struct ibv_mr *mempool_mr;
+ struct tgt_event poll_sched;
+
/* free and allocated mempool entries */
struct list_head mempool_free, mempool_alloc;
};
@@ -217,10 +221,6 @@ static struct list_head iser_conn_list;
/* if any task needs an rdma read or write slot to proceed */
static int waiting_rdma_slot;
-/* progress available, used with tgt_counter_event */
-static int num_tx_ready;
-static int num_rx_ready;
-
#define uint64_from_ptr(p) (uint64_t)(uintptr_t)(p)
#define ptr_from_int64(p) (void *)(unsigned long)(p)
@@ -251,6 +251,9 @@ static int num_rx_ready;
#define RDMA_PER_CONN 20
#define RDMA_TRANSFER_SIZE (512 * 1024)
+
+#define MAX_POLL_WC 8
+
/*
* Number of allocatable data buffers, each of this size. Do at least 128
* for linux iser. The mempool size is rounded up at initialization time
@@ -270,13 +273,17 @@ static inline struct conn_info *RDMA_CONN(struct iscsi_connection *conn)
return container_of(conn, struct conn_info, iscsi_conn);
}
-static void iser_cqe_handler(int fd, int events, void *data);
-static void iser_rx_progress(int *counter, void *data);
+static void iser_cqe_handler(int fd __attribute__((unused)),
+ int events __attribute__((unused)),
+ void *data);
static void iser_rdma_read_completion(struct rdmalist *rdma);
static void iscsi_rdma_release(struct iscsi_connection *conn);
static int iscsi_rdma_show(struct iscsi_connection *conn, char *buf,
int rest);
static void iscsi_rdma_event_modify(struct iscsi_connection *conn, int events);
+static void iser_sched_poll_cq(struct tgt_event *tev);
+static void iser_sched_consume_cq(struct tgt_event *tev);
+static void iser_sched_tx(struct tgt_event *evt);
/*
* Called when ready for full feature, builds resources.
@@ -612,6 +619,8 @@ static int iser_device_init(struct iser_device *dev)
goto out;
}
+ tgt_init_sched_event(&dev->poll_sched, iser_sched_poll_cq, dev);
+
ret = ibv_req_notify_cq(dev->cq, 0);
if (ret) {
eprintf("ibv_req_notify failed: %s\n", strerror(ret));
@@ -691,6 +700,9 @@ static void iser_accept_connection(struct rdma_cm_event *event)
ci->login_phase = LOGIN_PHASE_START;
INIT_LIST_HEAD(&ci->conn_tx_ready);
list_add(&ci->iser_conn_list, &temp_conn);
+
+ tgt_init_sched_event(&ci->tx_sched, iser_sched_tx, ci);
+
/* initiator sits at dst, we are src */
memcpy(&ci->peer_addr, &event->id->route.addr.dst_addr,
sizeof(ci->peer_addr));
@@ -940,7 +952,7 @@ static void handle_wc(struct ibv_wc *wc)
list_add(&rdmal->list, &ci->rdmal);
if (waiting_rdma_slot) {
waiting_rdma_slot = 0;
- num_tx_ready = 1;
+ tgt_add_sched_event(&ci->tx_sched);
}
break;
@@ -957,7 +969,7 @@ static void handle_wc(struct ibv_wc *wc)
list_add(&rdmal->list, &ci->rdmal);
if (waiting_rdma_slot) {
waiting_rdma_slot = 0;
- num_tx_ready = 1;
+ tgt_add_sched_event(&ci->tx_sched);
}
break;
@@ -974,85 +986,14 @@ close_err:
}
/*
- * Called directly from main event loop when a CQ notification is
- * available.
- */
-static void iser_cqe_handler(int fd __attribute__((unused)),
- int events __attribute__((unused)),
- void *data)
-{
- int ret;
- void *cq_context;
- struct iser_device *dev = data;
-
- ret = ibv_get_cq_event(dev->cq_channel, &dev->cq, &cq_context);
- if (ret != 0) {
- eprintf("notification, but no CQ event\n");
- exit(1);
- }
-
- ibv_ack_cq_events(dev->cq, 1);
-
- ret = ibv_req_notify_cq(dev->cq, 0);
- if (ret) {
- eprintf("ibv_req_notify_cq: %s\n", strerror(ret));
- exit(1);
- }
-
- iser_rx_progress(NULL, dev);
-}
-
-/*
- * Called from tgtd when num_tx_ready (counter) non-zero. Walks the
- * list of active connections and tries to push tx on each, until nothing
- * is ready anymore. No progress limit here.
- */
-static void iser_tx_progress(int *counter __attribute__((unused)),
- void *data __attribute__((unused)))
-{
- int reloop, ret;
- struct conn_info *ci, *cin;
- struct iscsi_connection *conn;
-
- dprintf("entry\n");
- num_tx_ready = 0;
-
- do {
- reloop = 0;
- list_for_each_entry_safe(ci, cin, &conn_tx_ready,
- conn_tx_ready) {
- conn = &ci->iscsi_conn;
- if (conn->state == STATE_CLOSE) {
- dprintf("ignoring tx for closed conn\n");
- } else {
- dprintf("trying tx\n");
- ret = iscsi_tx_handler(conn);
- if (conn->state == STATE_CLOSE) {
- conn_close(conn);
- dprintf("connection %p closed\n", ci);
- } else {
- if (ret == 0) {
- reloop = 1;
- } else {
- /* but leave on tx ready list */
- waiting_rdma_slot = 1;
- }
- }
- }
- }
- } while (reloop);
-}
-
-/*
* Could read as many entries as possible without blocking, but
* that just fills up a list of tasks. Instead pop out of here
* so that tx progress, like issuing rdma reads and writes, can
* happen periodically.
*/
-#define MAX_RX_PROGRESS 8
-static void iser_rx_progress_one(struct iser_device *dev)
+static int iser_poll_cq(struct iser_device *dev, int max_wc)
{
- int ret, numwc = 0;
+ int ret = 0, numwc = 0;
struct ibv_wc wc;
struct conn_info *ci;
struct recvlist *recvl;
@@ -1069,8 +1010,8 @@ static void iser_rx_progress_one(struct iser_device *dev)
VALGRIND_MAKE_MEM_DEFINED(&wc, sizeof(wc));
if (wc.status == IBV_WC_SUCCESS) {
handle_wc(&wc);
- if (++numwc == MAX_RX_PROGRESS) {
- num_rx_ready = 1;
+ if (++numwc == max_wc) {
+ ret = 1;
break;
}
} else if (wc.status == IBV_WC_WR_FLUSH_ERR) {
@@ -1089,23 +1030,114 @@ static void iser_rx_progress_one(struct iser_device *dev)
wc.status, (unsigned long long) wc.wr_id);
}
}
+ return ret;
+}
+
+static void iser_poll_cq_armable(struct iser_device *dev)
+{
+ int ret;
+
+ ret = iser_poll_cq(dev, MAX_POLL_WC);
+ if (ret < 0)
+ exit(1);
+
+ if (ret == 0) {
+ /* no more completions on cq, arm the completion interrupts */
+ ret = ibv_req_notify_cq(dev->cq, 0);
+ if (ret) {
+ eprintf("ibv_req_notify_cq: %s\n", strerror(ret));
+ exit(1);
+ }
+ dev->poll_sched.sched_handler = iser_sched_consume_cq;
+ } else
+ dev->poll_sched.sched_handler = iser_sched_poll_cq;
+
+ tgt_add_sched_event(&dev->poll_sched);
+}
+
+/* Scheduled to poll cq after a completion event has been
+ received and acknowledged, if no more completions are found
+ the interrupts are re-armed */
+static void iser_sched_poll_cq(struct tgt_event *tev)
+{
+ struct iser_device *dev = tev->data;
+ iser_poll_cq_armable(dev);
+}
+
+/* Scheduled to consume completion events that could arrive
+ after the cq had been seen empty but just before
+ the notification interrupts were re-armed.
+ Intended to consume those remaining completions only,
+ this function does not re-arm interrupts. */
+static void iser_sched_consume_cq(struct tgt_event *tev)
+{
+ struct iser_device *dev = tev->data;
+ int ret;
+
+ ret = iser_poll_cq(dev, MAX_POLL_WC);
+ if (ret < 0)
+ exit(1);
+}
+
+/*
+ * Called directly from main event loop when a CQ notification is
+ * available.
+ */
+static void iser_cqe_handler(int fd __attribute__((unused)),
+ int events __attribute__((unused)),
+ void *data)
+{
+ struct iser_device *dev = data;
+ void *cq_context;
+ int ret;
+
+ ret = ibv_get_cq_event(dev->cq_channel, &dev->cq, &cq_context);
+ if (ret != 0) {
+ eprintf("notification, but no CQ event\n");
+ exit(1);
+ }
+
+ ibv_ack_cq_events(dev->cq, 1);
+
+ /* if a poll was previosuly scheduled, remove it,
+ as it will be scheduled when necessary */
+ if (dev->poll_sched.scheduled)
+ tgt_remove_sched_event(&dev->poll_sched);
+
+ iser_poll_cq_armable(dev);
}
/*
- * Only one progress counter, must look across all devs.
+ * Called from tgtd as a scheduled event
+ * tries to push tx on a connection, until nothing
+ * is ready anymore. No progress limit here.
*/
-static void iser_rx_progress(int *counter __attribute__((unused)), void *data)
+static void iser_sched_tx(struct tgt_event *evt)
{
- struct iser_device *dev;
+ struct conn_info *ci = evt->data;
+ struct iscsi_connection *conn = &ci->iscsi_conn;
+ int ret;
dprintf("entry\n");
- num_rx_ready = 0;
- if (data == NULL) {
- list_for_each_entry(dev, &iser_dev_list, list)
- iser_rx_progress_one(dev);
- } else {
- dev = data;
- iser_rx_progress_one(dev);
+
+ if (conn->state == STATE_CLOSE) {
+ dprintf("ignoring tx for closed conn\n");
+ return;
+ }
+
+ for (;;) {
+ dprintf("trying tx\n");
+ ret = iscsi_tx_handler(conn);
+ if (conn->state == STATE_CLOSE) {
+ conn_close(conn);
+ dprintf("connection %p closed\n", ci);
+ break;
+ }
+ if (ret != 0) {
+ /* but leave on tx ready list */
+ waiting_rdma_slot = 1;
+ break;
+ }
}
}
@@ -1165,10 +1197,7 @@ static int iscsi_rdma_init(void)
INIT_LIST_HEAD(&iser_dev_list);
INIT_LIST_HEAD(&iser_conn_list);
INIT_LIST_HEAD(&temp_conn);
- num_tx_ready = 0;
- num_rx_ready = 0;
- ret = tgt_counter_event_add(&num_tx_ready, iser_tx_progress, NULL);
- ret = tgt_counter_event_add(&num_rx_ready, iser_rx_progress, NULL);
+
return ret;
}
@@ -1397,10 +1426,6 @@ static void iscsi_iser_write_end(struct iscsi_connection *conn)
ci->writeb = 0; /* reset count */
ci->send_comm_event = NULL;
-
- /* wake up the progress engine to do the done */
- dprintf("inc progress to finish cmd\n");
- num_tx_ready = 1;
}
/*
@@ -1505,7 +1530,7 @@ static int iscsi_rdma_rdma_write(struct iscsi_connection *conn)
iscsi_rdma_event_modify(conn, EPOLLIN);
} else {
/* poke ourselves to do the next rdma */
- num_tx_ready = 1;
+ tgt_add_sched_event(&ci->tx_sched);
}
return ret;
@@ -1628,7 +1653,7 @@ static void iscsi_rdma_event_modify(struct iscsi_connection *conn, int events)
dprintf("tx ready adding %p\n", ci);
list_add(&ci->conn_tx_ready, &conn_tx_ready);
}
- num_tx_ready = 1;
+ tgt_add_sched_event(&ci->tx_sched);
} else {
dprintf("tx ready removing %p\n", ci);
list_del_init(&ci->conn_tx_ready);
diff --git a/usr/log.c b/usr/log.c
index 076c770..056314a 100644
--- a/usr/log.c
+++ b/usr/log.c
@@ -24,6 +24,7 @@
#include <unistd.h>
#include <syslog.h>
#include <signal.h>
+#include <errno.h>
#include <sys/shm.h>
#include <sys/ipc.h>
#include <sys/types.h>
@@ -52,29 +53,39 @@ static int logarea_init (int size)
logdbg(stderr,"enter logarea_init\n");
if ((shmid = shmget(IPC_PRIVATE, sizeof(struct logarea),
- 0644 | IPC_CREAT | IPC_EXCL)) == -1)
+ 0644 | IPC_CREAT | IPC_EXCL)) == -1) {
+ syslog(LOG_ERR, "shmget logarea failed %d", errno);
return 1;
+ }
la = shmat(shmid, NULL, 0);
- if (!la)
+ if (!la) {
+ syslog(LOG_ERR, "shmat logarea failed %d", errno);
return 1;
+ }
+
+ shmctl(shmid, IPC_RMID, NULL);
if (size < MAX_MSG_SIZE)
size = LOG_SPACE_SIZE;
if ((shmid = shmget(IPC_PRIVATE, size,
0644 | IPC_CREAT | IPC_EXCL)) == -1) {
+ syslog(LOG_ERR, "shmget msg failed %d", errno);
shmdt(la);
return 1;
}
la->start = shmat(shmid, NULL, 0);
if (!la->start) {
+ syslog(LOG_ERR, "shmat msg failed %d", errno);
shmdt(la);
return 1;
}
memset(la->start, 0, size);
+ shmctl(shmid, IPC_RMID, NULL);
+
la->empty = 1;
la->end = la->start + size;
la->head = la->start;
@@ -82,18 +93,23 @@ static int logarea_init (int size)
if ((shmid = shmget(IPC_PRIVATE, MAX_MSG_SIZE + sizeof(struct logmsg),
0644 | IPC_CREAT | IPC_EXCL)) == -1) {
+ syslog(LOG_ERR, "shmget logmsg failed %d", errno);
shmdt(la->start);
shmdt(la);
return 1;
}
la->buff = shmat(shmid, NULL, 0);
if (!la->buff) {
+ syslog(LOG_ERR, "shmat logmsgfailed %d", errno);
shmdt(la->start);
shmdt(la);
return 1;
}
+ shmctl(shmid, IPC_RMID, NULL);
+
if ((la->semid = semget(SEMKEY, 1, 0666 | IPC_CREAT)) < 0) {
+ syslog(LOG_ERR, "semget failed %d", errno);
shmdt(la->buff);
shmdt(la->start);
shmdt(la);
@@ -102,6 +118,7 @@ static int logarea_init (int size)
la->semarg.val=1;
if (semctl(la->semid, 0, SETVAL, la->semarg) < 0) {
+ syslog(LOG_ERR, "semctl failed %d", errno);
shmdt(la->buff);
shmdt(la->start);
shmdt(la);
diff --git a/usr/spc.c b/usr/spc.c
index 60fd7d7..ac5c3de 100644
--- a/usr/spc.c
+++ b/usr/spc.c
@@ -383,6 +383,9 @@ int spc_mode_select(int host_no, struct scsi_cmd *cmd,
if (block_descriptor_len != BLOCK_DESCRIPTOR_LEN)
goto sense;
+ memcpy(cmd->dev->mode_block_descriptor, data + offset,
+ BLOCK_DESCRIPTOR_LEN);
+
offset += 8;
}
diff --git a/usr/tgtd.c b/usr/tgtd.c
index 0b1cb4c..62aaa04 100644
--- a/usr/tgtd.c
+++ b/usr/tgtd.c
@@ -38,26 +38,13 @@
#include "work.h"
#include "util.h"
-struct tgt_event {
- union {
- event_handler_t *handler;
- counter_event_handler_t *counter_handler;
- };
- union {
- int fd;
- int *counter;
- };
- void *data;
- struct list_head e_list;
-};
-
unsigned long pagesize, pageshift, pagemask;
int system_active = 1;
static int ep_fd;
static char program_name[] = "tgtd";
static LIST_HEAD(tgt_events_list);
-static LIST_HEAD(tgt_counter_events_list);
+static LIST_HEAD(tgt_sched_events_list);
static struct option const long_options[] =
{
@@ -136,22 +123,6 @@ int tgt_event_add(int fd, int events, event_handler_t handler, void *data)
return err;
}
-int tgt_counter_event_add(int *counter, counter_event_handler_t handler,
- void *data)
-{
- struct tgt_event *tev;
-
- tev = zalloc(sizeof(*tev));
- if (!tev)
- return -ENOMEM;
-
- tev->data = data;
- tev->counter_handler = handler;
- tev->counter = counter;
- list_add(&tev->e_list, &tgt_counter_events_list);
- return 0;
-}
-
static struct tgt_event *tgt_event_lookup(int fd)
{
struct tgt_event *tev;
@@ -163,17 +134,6 @@ static struct tgt_event *tgt_event_lookup(int fd)
return NULL;
}
-static struct tgt_event *tgt_counter_event_lookup(int *counter)
-{
- struct tgt_event *tev;
-
- list_for_each_entry(tev, &tgt_counter_events_list, e_list) {
- if (tev->counter == counter)
- return tev;
- }
- return NULL;
-}
-
void tgt_event_del(int fd)
{
struct tgt_event *tev;
@@ -189,20 +149,6 @@ void tgt_event_del(int fd)
free(tev);
}
-void tgt_counter_event_del(int *counter)
-{
- struct tgt_event *tev;
-
- tev = tgt_counter_event_lookup(counter);
- if (!tev) {
- eprintf("Cannot find counter event %p\n", counter);
- return;
- }
-
- list_del(&tev->e_list);
- free(tev);
-}
-
int tgt_event_modify(int fd, int events)
{
struct epoll_event ev;
@@ -221,26 +167,62 @@ int tgt_event_modify(int fd, int events)
return epoll_ctl(ep_fd, EPOLL_CTL_MOD, fd, &ev);
}
+void tgt_init_sched_event(struct tgt_event *evt,
+ sched_event_handler_t sched_handler, void *data)
+{
+ evt->sched_handler = sched_handler;
+ evt->scheduled = 0;
+ evt->data = data;
+ INIT_LIST_HEAD(&evt->e_list);
+}
+
+void tgt_add_sched_event(struct tgt_event *evt)
+{
+ if (!evt->scheduled) {
+ evt->scheduled = 1;
+ list_add_tail(&evt->e_list, &tgt_sched_events_list);
+ }
+}
+
+void tgt_remove_sched_event(struct tgt_event *evt)
+{
+ if (evt->scheduled) {
+ evt->scheduled = 0;
+ list_del_init(&evt->e_list);
+ }
+}
+
+static int tgt_exec_scheduled(void)
+{
+ struct list_head *last_sched;
+ struct tgt_event *tev, *tevn;
+ int work_remains = 0;
+
+ if (!list_empty(&tgt_sched_events_list)) {
+ /* execute only work scheduled till now */
+ last_sched = tgt_sched_events_list.prev;
+ list_for_each_entry_safe(tev, tevn, &tgt_sched_events_list,
+ e_list) {
+ tgt_remove_sched_event(tev);
+ tev->sched_handler(tev);
+ if (&tev->e_list == last_sched)
+ break;
+ }
+ if (!list_empty(&tgt_sched_events_list))
+ work_remains = 1;
+ }
+ return work_remains;
+}
+
static void event_loop(void)
{
- int nevent, i, done, timeout = TGTD_TICK_PERIOD * 1000;
+ int nevent, i, sched_remains, timeout;
struct epoll_event events[1024];
- struct tgt_event *tev, *tevn;
+ struct tgt_event *tev;
retry:
- /*
- * Check the counter events to see if they have any work to run.
- */
- do {
- done = 1;
- list_for_each_entry_safe(tev, tevn, &tgt_counter_events_list,
- e_list) {
- if (*tev->counter) {
- done = 0;
- tev->counter_handler(tev->counter, tev->data);
- }
- }
- } while (!done);
+ sched_remains = tgt_exec_scheduled();
+ timeout = sched_remains ? 0 : TGTD_TICK_PERIOD * 1000;
nevent = epoll_wait(ep_fd, events, ARRAY_SIZE(events), timeout);
if (nevent < 0) {
diff --git a/usr/tgtd.h b/usr/tgtd.h
index 4febcd3..da751c8 100644
--- a/usr/tgtd.h
+++ b/usr/tgtd.h
@@ -206,13 +206,20 @@ extern int tgt_bind_host_to_target(int tid, int host_no);
extern int tgt_unbind_host_to_target(int tid, int host_no);
extern int tgt_bound_target_lookup(int host_no);
-typedef void (event_handler_t)(int fd, int events, void *data);
-typedef void (counter_event_handler_t)(int *counter, void *data);
+struct tgt_event;
+typedef void (*sched_event_handler_t)(struct tgt_event *tev);
+
+extern void tgt_init_sched_event(struct tgt_event *evt,
+ sched_event_handler_t sched_handler, void *data);
+
+typedef void (*event_handler_t)(int fd, int events, void *data);
+
extern int tgt_event_add(int fd, int events, event_handler_t handler, void *data);
-extern int tgt_counter_event_add(int *counter, counter_event_handler_t handler,
- void *data);
extern void tgt_event_del(int fd);
-extern void tgt_counter_event_del(int *counter);
+
+extern void tgt_add_sched_event(struct tgt_event *evt);
+extern void tgt_remove_sched_event(struct tgt_event *evt);
+
extern int tgt_event_modify(int fd, int events);
extern int target_cmd_queue(int tid, struct scsi_cmd *cmd);
extern void target_cmd_done(struct scsi_cmd *cmd);
@@ -262,4 +269,17 @@ extern int dtd_load_unload(int tid, uint64_t lun, int load, char *file);
extern int register_backingstore_template(struct backingstore_template *bst);
extern struct backingstore_template *get_backingstore_template(const char *name);
+struct tgt_event {
+ union {
+ event_handler_t handler;
+ sched_event_handler_t sched_handler;
+ };
+ union {
+ int fd;
+ int scheduled;
+ };
+ void *data;
+ struct list_head e_list;
+};
+
#endif