Files
xtandem/plibrary.pl
Archie Cobbs 69accb2a62 - Add xtandem2mgf utility based on plibrary.pl from web site
- Remove database files as they are large and quickly obsolete
- added patches:
  * plibrary.patch

OBS-URL: https://build.opensuse.org/package/show/science/xtandem?expand=0&rev=12
2013-12-17 18:50:07 +00:00

279 lines
5.8 KiB
Perl

#!c:/perl/bin/perl.exe
##
##
## plibrary.pl
## Copyright (C) 2009 Ronald C Beavis, all rights reserved
## The Global Proteome Machine
## This software is a component of the X! proteomics software
## development project
##
## Use of this software governed by the Artistic license,
## as reproduced at http://www.opensource.org/licenses/artistic-license.php
##
## plibrary accepts a GPM data file as an input and outputs an X! Hunter
## compatible MGF annotated spectrum library.
##
## Version 2009.07.08 - first version
##
##
use strict;
use CGI qw(:all);
use CGI::Carp qw(fatalsToBrowser);
use HTTP::Request::Common qw(POST GET);
use LWP::UserAgent;
my $cgi = CGI->new();
require "./defines.pl";
require "./common.pl";
my $file_version = "plibrary.pl, v. 2009.08.18";
my $gvalue = GetGsite('gpmdb_url');
my $wvalue = GetGsite('wiki_url');
my $mvalue= GetGsite('mrm_url');
my $path = get_root() . $cgi->param('path');
my $url = $cgi->param('path');
my $label;
my @taxa;
if(not open(INPUT,"<$path")) {
if(open(INPUT,"<$path.gz")) {
close(INPUT);
system("gzip -d $path.gz");
open(INPUT,"<$path");
}
}
while(<INPUT>) {
if(/\<bioml/) {
if(/label=\"/) {
s/.*label=\"(.*?)\".*/$1/i;
$label = $_;
}
close(INPUT);
}
}
close(INPUT);
$label =~ s/\\/-/g;
$label =~ s/:/·/g;
$label =~ s/\//-/g;
$label =~ s/·\-/·/g;
if(length($label) == 0) {
$label = "main model listing";
}
my $gpm = $path;
if($gpm =~ /GPM[A-Z]*[0-9]{11}\./i) {
$gpm =~ s/.*(GPM[A-Z]*[0-9]{11})\..*/$1/i;
}
else {
($gpm) = $gpm =~ /.+\/(.+?)\..*?/;
}
open(INPUT,"<$path") or die "$path not found";
my @libraries;
my @expects;
my @indices;
my %index;
while(<INPUT>) {
if(/\<group/ and /type\=\"model\"/) {
my @group;
chomp($_);
push(@group,$_);
$_ = <INPUT>;
while($_ and not /\<\/group\>\<\/group\>/) {
chomp($_);
push(@group,$_);
$_ = <INPUT>;
}
my ($l,$e,$i) = ProcessGroup(\@group);
if($index{$i}) {
if($e < $index{$i}) {
$index{$i} = $e;
}
}
else {
$index{$i} = $e;
}
push(@libraries,$l);
push(@indices,$i);
push(@expects,$e);
}
}
close(INPUT);
my $libsize = 0;
my $x = 0;
while($x < scalar(@expects)) {
if(1 or $index{@indices[$x]} == @expects[$x]) {
$libsize++;
}
$x++;
}
PrintHead($gpm,$libsize);
$x = 0;
while($x < scalar(@expects)) {
if(1 or $index{@indices[$x]} == @expects[$x]) {
print @libraries[$x];
}
$x++;
}
sub GetLibrarySize
{
my ($_p) = @_;
open(IN,"<$_p");
my $size = 0;
while(<IN>) {
if(/\<group/ and /type\=\"model\"/) {
$size++;
}
}
return $size;
}
sub PrintHead
{
my ($_h,$_l) = @_;
print qq(Content-type: text/plain\nContent-disposition: attachment; filename=$_h.mgf\n\n);
# print qq(Content-type: text/plain\n\n<pre>);
print qq(SEARCH=MIS\r\nREPTYPE=Peptide\r\nLIBSIZE=$_l\r\n## created by: $file_version\r\n);
print qq(## original data set: $gpm\r\n);
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
printf "## creation date: %4d.%02d.%02d %02d:%02d:%02d\r\n",$year+1900,$mon+1,$mday,$hour,$min,$sec;
}
sub ProcessGroup
{
my ($_g) = @_;
my $label;
my @starts;
my @expects;
my @sequences;
my @mods;
my $g;
my @labels;
my $spec = 0;
my $x = 0;
my $length = scalar(@$_g);
my $mz;
my $z;
my @xs;
my @ys;
while($x < $length) {
$g = @$_g[$x];
$x++;
if($x == 1) {
($z) = $g =~ /z\=\"(.+?)\"/;
}
elsif($g =~ /\<protein/) {
($label) = $g =~ /label\=\"(.+?)\"/;
}
elsif($g =~ /\<domain/) {
my ($s) = $g =~ /start\=\"(.+?)\"/;
my $st = $s;
push(@starts,$s);
($s) = $g =~ /expect\=\"(.+?)\"/;
push(@expects,$s);
($s) = $g =~ /seq\=\"(.+?)\"/;
push(@sequences,$s);
push(@labels,"$label\@$st");
($mz) = $g =~ /mh\=\"(.+?)\"/;
$mz = ($mz - 1.007276)/$z + 1.007276;
$mz = sprintf("%.4f",$mz);
my @m = ();
while(not $g =~ /\<\/domain/) {
$g = @$_g[$x];
$x++;
if($g =~ /\<aa/) {
my $t;
($t) = $g =~ /modified\=\"(.+?)\"/;
($s) = $g =~ /at\=\"(.+?)\"/;
$s = $s - $st + 1;
$t = "$t\@$s";
if($g =~ /pm\=/) {
($s) = $g =~ /pm\=\"(.*?)\"/;
if($s) {
$t .= ",$s";
($s) = $g =~ /id\=\"(.*?)\"/;
if($s) {
$t .= ",$s";
}
}
}
push(@m,$t);
}
}
push(@mods,\@m);
}
elsif($g =~ /type\=\"support\" label\=\"fragment ion mass spectrum\"/) {
while($x < $length) {
$g = @$_g[$x];
$x++;
if($g =~ /\<GAML\:Xdata/) {
$x++;
$g = @$_g[$x];
while(not $g =~ /\<\/GAML\:values\>/) {
my @v = split / /,$g;
my $b;
foreach $b(@v) {
push(@xs,$b);
}
$x++;
$g = @$_g[$x];
}
}
if($g =~ /\<GAML\:Ydata/) {
$x++;
$g = @$_g[$x];
while(not $g =~ /\<\/GAML\:values\>/) {
my @v = split / /,$g;
my $b;
foreach $b(@v) {
push(@ys,$b);
}
$x++;
$g = @$_g[$x];
}
}
}
last;
}
}
my @rank = sort { $ys[$b] <=> $ys[$a] } 0 .. $#ys;
my @xt;
my @yt;
my $count = 0;
foreach $x(@rank) {
push(@xt,@xs[$x]);
push(@yt,@ys[$x]);
$count++;
if($count == 20) {
last;
}
}
@rank = sort { $xt[$a] <=> $xt[$b] } 0 .. $#xt;
my $out = "BEGIN IONS\r\n";
my $index = "@sequences[0]$z";
$out .= "PEPMASS=$mz\r\n";
$out .= "CHARGE=$z\r\n";
$out .= "PEPSEQ=" . @sequences[0] . "\r\n";
$out .= "PEPEXP=" . @expects[0] . "\r\n";
my $mod = @mods[0];
foreach $x(@$mod) {
$out .= "PEPMOD=$x\r\n";
$index .= $x;
}
foreach $x(@labels) {
$out .= "PEPACC=$x\r\n";
}
foreach $x(@rank) {
$out .= "@xt[$x] @yt[$x]\r\n";
}
$out .= "END IONS\r\n\r\n";
return ($out,@expects[0],$index);
}