- Remove database files as they are large and quickly obsolete - added patches: * plibrary.patch OBS-URL: https://build.opensuse.org/package/show/science/xtandem?expand=0&rev=12
279 lines
5.8 KiB
Perl
279 lines
5.8 KiB
Perl
#!c:/perl/bin/perl.exe
|
|
##
|
|
##
|
|
## plibrary.pl
|
|
## Copyright (C) 2009 Ronald C Beavis, all rights reserved
|
|
## The Global Proteome Machine
|
|
## This software is a component of the X! proteomics software
|
|
## development project
|
|
##
|
|
## Use of this software governed by the Artistic license,
|
|
## as reproduced at http://www.opensource.org/licenses/artistic-license.php
|
|
##
|
|
## plibrary accepts a GPM data file as an input and outputs an X! Hunter
|
|
## compatible MGF annotated spectrum library.
|
|
##
|
|
## Version 2009.07.08 - first version
|
|
##
|
|
##
|
|
|
|
use strict;
|
|
use CGI qw(:all);
|
|
use CGI::Carp qw(fatalsToBrowser);
|
|
use HTTP::Request::Common qw(POST GET);
|
|
use LWP::UserAgent;
|
|
my $cgi = CGI->new();
|
|
|
|
require "./defines.pl";
|
|
require "./common.pl";
|
|
|
|
my $file_version = "plibrary.pl, v. 2009.08.18";
|
|
|
|
my $gvalue = GetGsite('gpmdb_url');
|
|
my $wvalue = GetGsite('wiki_url');
|
|
my $mvalue= GetGsite('mrm_url');
|
|
|
|
my $path = get_root() . $cgi->param('path');
|
|
my $url = $cgi->param('path');
|
|
|
|
my $label;
|
|
my @taxa;
|
|
if(not open(INPUT,"<$path")) {
|
|
if(open(INPUT,"<$path.gz")) {
|
|
close(INPUT);
|
|
system("gzip -d $path.gz");
|
|
open(INPUT,"<$path");
|
|
}
|
|
}
|
|
|
|
while(<INPUT>) {
|
|
if(/\<bioml/) {
|
|
if(/label=\"/) {
|
|
s/.*label=\"(.*?)\".*/$1/i;
|
|
$label = $_;
|
|
}
|
|
close(INPUT);
|
|
}
|
|
}
|
|
close(INPUT);
|
|
$label =~ s/\\/-/g;
|
|
$label =~ s/:/·/g;
|
|
$label =~ s/\//-/g;
|
|
$label =~ s/·\-/·/g;
|
|
if(length($label) == 0) {
|
|
$label = "main model listing";
|
|
}
|
|
|
|
my $gpm = $path;
|
|
if($gpm =~ /GPM[A-Z]*[0-9]{11}\./i) {
|
|
$gpm =~ s/.*(GPM[A-Z]*[0-9]{11})\..*/$1/i;
|
|
}
|
|
else {
|
|
($gpm) = $gpm =~ /.+\/(.+?)\..*?/;
|
|
}
|
|
|
|
|
|
open(INPUT,"<$path") or die "$path not found";
|
|
my @libraries;
|
|
my @expects;
|
|
my @indices;
|
|
my %index;
|
|
while(<INPUT>) {
|
|
if(/\<group/ and /type\=\"model\"/) {
|
|
my @group;
|
|
chomp($_);
|
|
push(@group,$_);
|
|
$_ = <INPUT>;
|
|
while($_ and not /\<\/group\>\<\/group\>/) {
|
|
chomp($_);
|
|
push(@group,$_);
|
|
$_ = <INPUT>;
|
|
}
|
|
my ($l,$e,$i) = ProcessGroup(\@group);
|
|
if($index{$i}) {
|
|
if($e < $index{$i}) {
|
|
$index{$i} = $e;
|
|
}
|
|
}
|
|
else {
|
|
$index{$i} = $e;
|
|
}
|
|
push(@libraries,$l);
|
|
push(@indices,$i);
|
|
push(@expects,$e);
|
|
}
|
|
}
|
|
close(INPUT);
|
|
my $libsize = 0;
|
|
my $x = 0;
|
|
while($x < scalar(@expects)) {
|
|
if(1 or $index{@indices[$x]} == @expects[$x]) {
|
|
$libsize++;
|
|
}
|
|
$x++;
|
|
}
|
|
PrintHead($gpm,$libsize);
|
|
$x = 0;
|
|
while($x < scalar(@expects)) {
|
|
if(1 or $index{@indices[$x]} == @expects[$x]) {
|
|
print @libraries[$x];
|
|
}
|
|
$x++;
|
|
}
|
|
|
|
sub GetLibrarySize
|
|
{
|
|
my ($_p) = @_;
|
|
open(IN,"<$_p");
|
|
my $size = 0;
|
|
while(<IN>) {
|
|
if(/\<group/ and /type\=\"model\"/) {
|
|
$size++;
|
|
}
|
|
}
|
|
return $size;
|
|
}
|
|
|
|
sub PrintHead
|
|
{
|
|
my ($_h,$_l) = @_;
|
|
print qq(Content-type: text/plain\nContent-disposition: attachment; filename=$_h.mgf\n\n);
|
|
# print qq(Content-type: text/plain\n\n<pre>);
|
|
print qq(SEARCH=MIS\r\nREPTYPE=Peptide\r\nLIBSIZE=$_l\r\n## created by: $file_version\r\n);
|
|
print qq(## original data set: $gpm\r\n);
|
|
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
|
|
printf "## creation date: %4d.%02d.%02d %02d:%02d:%02d\r\n",$year+1900,$mon+1,$mday,$hour,$min,$sec;
|
|
}
|
|
|
|
sub ProcessGroup
|
|
{
|
|
my ($_g) = @_;
|
|
my $label;
|
|
my @starts;
|
|
my @expects;
|
|
my @sequences;
|
|
my @mods;
|
|
my $g;
|
|
my @labels;
|
|
my $spec = 0;
|
|
my $x = 0;
|
|
my $length = scalar(@$_g);
|
|
my $mz;
|
|
my $z;
|
|
my @xs;
|
|
my @ys;
|
|
while($x < $length) {
|
|
$g = @$_g[$x];
|
|
$x++;
|
|
if($x == 1) {
|
|
($z) = $g =~ /z\=\"(.+?)\"/;
|
|
}
|
|
elsif($g =~ /\<protein/) {
|
|
($label) = $g =~ /label\=\"(.+?)\"/;
|
|
}
|
|
elsif($g =~ /\<domain/) {
|
|
my ($s) = $g =~ /start\=\"(.+?)\"/;
|
|
my $st = $s;
|
|
push(@starts,$s);
|
|
($s) = $g =~ /expect\=\"(.+?)\"/;
|
|
push(@expects,$s);
|
|
($s) = $g =~ /seq\=\"(.+?)\"/;
|
|
push(@sequences,$s);
|
|
push(@labels,"$label\@$st");
|
|
($mz) = $g =~ /mh\=\"(.+?)\"/;
|
|
$mz = ($mz - 1.007276)/$z + 1.007276;
|
|
$mz = sprintf("%.4f",$mz);
|
|
my @m = ();
|
|
while(not $g =~ /\<\/domain/) {
|
|
$g = @$_g[$x];
|
|
$x++;
|
|
if($g =~ /\<aa/) {
|
|
my $t;
|
|
($t) = $g =~ /modified\=\"(.+?)\"/;
|
|
($s) = $g =~ /at\=\"(.+?)\"/;
|
|
$s = $s - $st + 1;
|
|
$t = "$t\@$s";
|
|
if($g =~ /pm\=/) {
|
|
($s) = $g =~ /pm\=\"(.*?)\"/;
|
|
if($s) {
|
|
$t .= ",$s";
|
|
($s) = $g =~ /id\=\"(.*?)\"/;
|
|
if($s) {
|
|
$t .= ",$s";
|
|
}
|
|
}
|
|
}
|
|
push(@m,$t);
|
|
}
|
|
}
|
|
push(@mods,\@m);
|
|
}
|
|
elsif($g =~ /type\=\"support\" label\=\"fragment ion mass spectrum\"/) {
|
|
while($x < $length) {
|
|
$g = @$_g[$x];
|
|
$x++;
|
|
if($g =~ /\<GAML\:Xdata/) {
|
|
$x++;
|
|
$g = @$_g[$x];
|
|
while(not $g =~ /\<\/GAML\:values\>/) {
|
|
my @v = split / /,$g;
|
|
my $b;
|
|
foreach $b(@v) {
|
|
push(@xs,$b);
|
|
}
|
|
$x++;
|
|
$g = @$_g[$x];
|
|
}
|
|
}
|
|
if($g =~ /\<GAML\:Ydata/) {
|
|
$x++;
|
|
$g = @$_g[$x];
|
|
while(not $g =~ /\<\/GAML\:values\>/) {
|
|
my @v = split / /,$g;
|
|
my $b;
|
|
foreach $b(@v) {
|
|
push(@ys,$b);
|
|
}
|
|
$x++;
|
|
$g = @$_g[$x];
|
|
}
|
|
}
|
|
}
|
|
last;
|
|
}
|
|
|
|
}
|
|
my @rank = sort { $ys[$b] <=> $ys[$a] } 0 .. $#ys;
|
|
my @xt;
|
|
my @yt;
|
|
my $count = 0;
|
|
foreach $x(@rank) {
|
|
push(@xt,@xs[$x]);
|
|
push(@yt,@ys[$x]);
|
|
$count++;
|
|
if($count == 20) {
|
|
last;
|
|
}
|
|
}
|
|
@rank = sort { $xt[$a] <=> $xt[$b] } 0 .. $#xt;
|
|
my $out = "BEGIN IONS\r\n";
|
|
my $index = "@sequences[0]$z";
|
|
$out .= "PEPMASS=$mz\r\n";
|
|
$out .= "CHARGE=$z\r\n";
|
|
$out .= "PEPSEQ=" . @sequences[0] . "\r\n";
|
|
$out .= "PEPEXP=" . @expects[0] . "\r\n";
|
|
my $mod = @mods[0];
|
|
foreach $x(@$mod) {
|
|
$out .= "PEPMOD=$x\r\n";
|
|
$index .= $x;
|
|
}
|
|
foreach $x(@labels) {
|
|
$out .= "PEPACC=$x\r\n";
|
|
}
|
|
foreach $x(@rank) {
|
|
$out .= "@xt[$x] @yt[$x]\r\n";
|
|
}
|
|
$out .= "END IONS\r\n\r\n";
|
|
return ($out,@expects[0],$index);
|
|
}
|