Samtools implements various utilities for post-processing alignments in the SAM, BAM, and CRAM formats, including indexing, variant calling (in conjunction with bcftools), and a simple alignment viewer. This is the legacy version which does not use the HTSlib and is required by some software. OBS-URL: https://build.opensuse.org/request/show/390866 OBS-URL: https://build.opensuse.org/package/show/science/samtools-legacy?expand=0&rev=1
187 lines
5.9 KiB
Diff
187 lines
5.9 KiB
Diff
diff -up samtools-0.1.19/faidx.c.Rfixes samtools-0.1.19/faidx.c
|
|
--- samtools-0.1.19/faidx.c.Rfixes 2015-05-29 23:25:42.646798526 -0400
|
|
+++ samtools-0.1.19/faidx.c 2015-05-29 23:28:55.628504008 -0400
|
|
@@ -94,6 +94,11 @@ faidx_t *fai_build_core(RAZF *rz)
|
|
}
|
|
name[l_name++] = c;
|
|
}
|
|
+ if (m_name < l_name + 2) { /* MTM: 0-length id */
|
|
+ m_name = l_name + 2;
|
|
+ kroundup32(m_name);
|
|
+ name = (char*)realloc(name, m_name);
|
|
+ }
|
|
name[l_name] = '\0';
|
|
if (ret == 0) {
|
|
fprintf(stderr, "[fai_build_core] the last entry has no sequence\n");
|
|
@@ -127,11 +132,24 @@ faidx_t *fai_build_core(RAZF *rz)
|
|
}
|
|
}
|
|
}
|
|
- fai_insert_index(idx, name, len, line_len, line_blen, offset);
|
|
+ if (len < 0) { /* MTM; should also check state */
|
|
+ fprintf(stderr, "[fai_build_core] no entries in file\n");
|
|
+ free(name); fai_destroy(idx);
|
|
+ return 0;
|
|
+ }
|
|
+ fai_insert_index(idx, name, len, line_len, line_blen, offset);
|
|
free(name);
|
|
return idx;
|
|
}
|
|
|
|
+// HP - Jan 13, 2014: I've no idea why the original authors of the fai_save()
|
|
+// and fai_read() functions below decided to use the (long) type instead of
|
|
+// (long long) for the sequence offsets on Windows. Problem with this is that
|
|
+// these functions then break if the FASTA file contains sequences with offsets
|
|
+// > LONG_MAX which turns out to be 2^31-1 on Windows, hence not big enough if
|
|
+// the FASTA file contains the full genome sequences for Human and other
|
|
+// mammals. So I modified fai_save() and fai_read() to always use (long long).
|
|
+
|
|
void fai_save(const faidx_t *fai, FILE *fp)
|
|
{
|
|
khint_t k;
|
|
@@ -140,11 +158,12 @@ void fai_save(const faidx_t *fai, FILE *
|
|
faidx1_t x;
|
|
k = kh_get(s, fai->hash, fai->name[i]);
|
|
x = kh_value(fai->hash, k);
|
|
-#ifdef _WIN32
|
|
- fprintf(fp, "%s\t%d\t%ld\t%d\t%d\n", fai->name[i], (int)x.len, (long)x.offset, (int)x.line_blen, (int)x.line_len);
|
|
-#else
|
|
+// HP - Jan 13, 2014: See above note.
|
|
+//#ifdef _WIN32
|
|
+// fprintf(fp, "%s\t%d\t%ld\t%d\t%d\n", fai->name[i], (int)x.len, (long)x.offset, (int)x.line_blen, (int)x.line_len);
|
|
+//#else
|
|
fprintf(fp, "%s\t%d\t%lld\t%d\t%d\n", fai->name[i], (int)x.len, (long long)x.offset, (int)x.line_blen, (int)x.line_len);
|
|
-#endif
|
|
+//#endif
|
|
}
|
|
}
|
|
|
|
@@ -153,22 +172,24 @@ faidx_t *fai_read(FILE *fp)
|
|
faidx_t *fai;
|
|
char *buf, *p;
|
|
int len, line_len, line_blen;
|
|
-#ifdef _WIN32
|
|
- long offset;
|
|
-#else
|
|
+// HP - Jan 13, 2014: See above note.
|
|
+//#ifdef _WIN32
|
|
+// long offset;
|
|
+//#else
|
|
long long offset;
|
|
-#endif
|
|
+//#endif
|
|
fai = (faidx_t*)calloc(1, sizeof(faidx_t));
|
|
fai->hash = kh_init(s);
|
|
buf = (char*)calloc(0x10000, 1);
|
|
while (!feof(fp) && fgets(buf, 0x10000, fp)) {
|
|
for (p = buf; *p && isgraph(*p); ++p);
|
|
*p = 0; ++p;
|
|
-#ifdef _WIN32
|
|
- sscanf(p, "%d%ld%d%d", &len, &offset, &line_blen, &line_len);
|
|
-#else
|
|
+// HP - Jan 13, 2014: See above note.
|
|
+//#ifdef _WIN32
|
|
+// sscanf(p, "%d%ld%d%d", &len, &offset, &line_blen, &line_len);
|
|
+//#else
|
|
sscanf(p, "%d%lld%d%d", &len, &offset, &line_blen, &line_len);
|
|
-#endif
|
|
+//#endif
|
|
fai_insert_index(fai, buf, len, line_len, line_blen, offset);
|
|
}
|
|
free(buf);
|
|
@@ -200,6 +221,10 @@ int fai_build(const char *fn)
|
|
return -1;
|
|
}
|
|
fai = fai_build_core(rz);
|
|
+ if (fai == NULL) { /* MTM */
|
|
+ free(str);
|
|
+ return -1;
|
|
+ }
|
|
razf_close(rz);
|
|
fp = fopen(str, "wb");
|
|
if (fp == 0) {
|
|
@@ -258,10 +283,18 @@ FILE *download_and_open(const char *fn)
|
|
faidx_t *fai_load(const char *fn)
|
|
{
|
|
char *str;
|
|
- FILE *fp;
|
|
faidx_t *fai;
|
|
str = (char*)calloc(strlen(fn) + 5, 1);
|
|
sprintf(str, "%s.fai", fn);
|
|
+ fai = fai_load0(fn, str);
|
|
+ free(str);
|
|
+ return fai;
|
|
+}
|
|
+
|
|
+faidx_t *fai_load0(const char *fn, const char *str)
|
|
+{
|
|
+ FILE *fp;
|
|
+ faidx_t *fai;
|
|
|
|
#ifdef _USE_KNETFILE
|
|
if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)
|
|
@@ -270,7 +303,6 @@ faidx_t *fai_load(const char *fn)
|
|
if ( !fp )
|
|
{
|
|
fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\n", str);
|
|
- free(str);
|
|
return 0;
|
|
}
|
|
}
|
|
@@ -283,7 +315,6 @@ faidx_t *fai_load(const char *fn)
|
|
fp = fopen(str, "rb");
|
|
if (fp == 0) {
|
|
fprintf(stderr, "[fai_load] fail to open FASTA index.\n");
|
|
- free(str);
|
|
return 0;
|
|
}
|
|
}
|
|
@@ -292,7 +323,6 @@ faidx_t *fai_load(const char *fn)
|
|
fclose(fp);
|
|
|
|
fai->rz = razf_open(fn, "rb");
|
|
- free(str);
|
|
if (fai->rz == 0) {
|
|
fprintf(stderr, "[fai_load] fail to open FASTA file.\n");
|
|
return 0;
|
|
diff -up samtools-0.1.19/faidx.h.Rfixes samtools-0.1.19/faidx.h
|
|
--- samtools-0.1.19/faidx.h.Rfixes 2013-03-19 03:48:09.000000000 -0400
|
|
+++ samtools-0.1.19/faidx.h 2015-05-29 23:29:33.916247174 -0400
|
|
@@ -64,6 +64,13 @@ extern "C" {
|
|
faidx_t *fai_load(const char *fn);
|
|
|
|
/*!
|
|
+ @abstract Load index from "fn.fai".
|
|
+ @param fn File name of the FASTA file
|
|
+ @param index File name of the FASTA index
|
|
+ */
|
|
+ faidx_t *fai_load0(const char *fn, const char *index);
|
|
+
|
|
+ /*!
|
|
@abstract Fetch the sequence in a region.
|
|
@param fai Pointer to the faidx_t struct
|
|
@param reg Region in the format "chr2:20,000-30,000"
|
|
@@ -96,6 +103,22 @@ extern "C" {
|
|
*/
|
|
char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len);
|
|
|
|
+ /*!
|
|
+ @abstract Alternative to faidx_fetch_seq().
|
|
+ @param fai Pointer to the faidx_t struct
|
|
+ @param c_name Region name
|
|
+ @param p_beg_i Beginning position number (zero-based)
|
|
+ @param p_end_i End position number (zero-based)
|
|
+ @param out User-supplied output buffer
|
|
+ @return Number of bytes written; -1 on failure
|
|
+
|
|
+ @discussion Differences with faidx_fetch_seq(): (1) writes the
|
|
+ incoming sequence to user-supplied output buffer, (2) doesn't write
|
|
+ the terminating null byte ('\0'), (3) properly handles 0-length
|
|
+ sequences, (4) returns the number of bytes written; -1 on failure.
|
|
+ */
|
|
+ int faidx_fetch_seq2(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, char *out);
|
|
+
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|