#!/usr/bin/perl -w
#
# findModifiedPeaklists.pl
#
# Some modifications fall off very easily (O-GlcnAc) and as a consequence
# modified AA don't appear in spectra, but the additional mass is in the
# pre-cursor mass.  This script looks for the modification that has fallen
# off and keeps only those peaklists with the correct peak.  Then the
# modification's mass - 1 is subtracted from the PEPMASS and the new set
# of peaklists returned for submission to library searches.
#
# ver    date        author      description
# 0.6    2006.03.23  Aenoch      Adapt to work on LTQ files.
# 0.4    2005.01.10  Aenoch      Another correction for looking for 
#                                precursor with +3 and higher charge.
# 0.3    2004.11.30  Aenoch      Correct looking for precursor with
#                                charge state, increase tolerance default.
# 0.1    2004.08.17  Aenoch      Initial version.

use strict;
use Getopt::Long;
$/ = "\r\n";
my (  $param_peak_mass,
      $param_peak_mass_tolerance,
      $param_add_filename_to_title,
      $param_find_precursor,
      $param_precursor_mass_tolerance,
      $param_count,
      $param_help,
      $param_verbose,
      $param_version,
      $param_output_filename,
      $param_min_intensity,
      $param_oglcnac 
) = ();
&GetOptions( "peakmass=f" => \$param_peak_mass,
             "count=n" => \$param_count,
             "oglcnac" => \$param_oglcnac,
             "output=s" => \$param_output_filename,
             "min_intensity=f" => \$param_min_intensity,
             "tolerance=f" => \$param_peak_mass_tolerance,
             "find_precursor" => \$param_find_precursor,
             "precursor_tolerance=f" => \$param_precursor_mass_tolerance,
             "add_filename_to_title" => \$param_add_filename_to_title,
             "verbose" => \$param_verbose,
             "version" => \$param_version,
             "help" => \$param_help );

my $version = "0.6";
if ( $param_version ) {
   print "version = $version\n";
   exit;
}

if ( $param_help or $#ARGV == -1 ) {
   $0 =~ /[^\/]*$/;
   my $program_name = $&;
   die <<EOF;
usage: $program_name [parameters] mascot_tmp_file1 [mascot_tmp_file2...]

Perl script to find peaklists that contain peaks with --peakmass masses and
extract them from Mascot.dll generated peaklists.  Subtract ( --peakmass -
1 ) / charge from the PEPMASS value to simulate the modification falling
off the precursor peptide.  Output to STDOUT or to the named file.  Can read
multiple tmp files; output concatenated.

Parameters ( [] optional; S == string; N == integer; F == floating point )
  [--help]             Print this message.
  [--peakmass=F]       Look for peaklists with a peak with this mass.
  [--tolerance=F]      Peaks +/- this tolerance will match --peakmass, default
                       of 0.03.
  [--min_intensity=F]  Minimum intensity in order to be called a peak.
                       Defaults to 0.0.
  [--find_precursor]   Look for the precursor mass after the modification has
                       fallen off.
  [--precursor_tolerance=F]
                       Accept precursor mass without modification +/- this 
                       tolerance.  Default to 0.4.
  [--output=S]         Output to the named file.
  [--oglcnac]          Subtract the mass for O-GlcnAC modifications, overrides
                       the --peakmass parameter with 204.08; --tolerance
                       parameter with 0.03 can be overridden.
  [--add_filename_to_title]
                       Append the filename to the TITLE= line in the peaklist.

EOF
}

our $VERBOSE = ( defined $param_verbose ? $param_verbose : 0 );

my ( $line, $pepmass, $reduced_pepmass, $charge, $elution, $file, $peak,
     $output, $pepmass_count ) = ();
my @pl = ();

if ( $param_output_filename ) {
   open WH, ">$param_output_filename" or die "cannot open file \"$param_output_filename\" for write\n";
   $output = \*WH;
}
else {
   $output = \*STDOUT;
}

if ( $param_oglcnac ) {
   $param_peak_mass = 204.08;
   print STDERR "using oglcnac definitions with --peakmass=$param_peak_mass\n" if $::VERBOSE;
}

if ( not $param_peak_mass ) {
   die "need to specify --peakmass or a named modification (e.g., --oglcnac)\n\n";
}

#
# set defaults
#
$param_min_intensity = ( $param_min_intensity ? $param_min_intensity : 0.0 );
$param_count = ( defined $param_count ? $param_count : 1 );
$param_peak_mass_tolerance = ( defined $param_peak_mass_tolerance ?
                               $param_peak_mass_tolerance : 
                               0.03 );
if ( $param_find_precursor ) {
   $param_precursor_mass_tolerance = ( defined $param_precursor_mass_tolerance ? $param_precursor_mass_tolerance : 0.4 );
}

if ( $::VERBOSE ) {
   print STDERR <<EOF;
parameters:
   --peakmass=$param_peak_mass
   --tolerance=$param_peak_mass_tolerance
   --min_intentity=$param_min_intensity
   --find_precursor $param_find_precursor
   --precursor_tolerance=$param_precursor_mass_tolerance
   --output=$param_output_filename
   --add_filename_to_title=$param_add_filename_to_title
EOF
}


$pepmass_count = 0;
foreach $file ( @ARGV ) {
   open RH, $file or die "cannot open file \"$file\" for read\n";
   if ( $::VERBOSE ) {
      print STDERR "FILENAME = $file\n";
   }

   while ( ($pepmass, $charge, $elution) = &get_next_peaklist( \*RH, \@pl ) ) {
      if ( $pepmass and $charge ) {
         $pepmass_count++;
         if ( &has_peak( $param_peak_mass, $param_peak_mass_tolerance, \@pl ) >= $param_min_intensity ) {
            print STDERR "found peak at elution = $elution\n" if $::VERBOSE;
            if ( $param_find_precursor ) {
               if ( $charge > 1 ) {
                  $peak = ( $pepmass * $charge - ( $charge - 2 ) - ( $param_peak_mass - 1 ) ) / ( $charge - 1 );
                  unless ( &has_peak( $peak, $param_precursor_mass_tolerance, \@pl ) >= $param_min_intensity ) {
                     @pl = ();
                     next;
                  }
                  print STDERR "found precursor after losing modification at elution = $elution\n" if $::VERBOSE;
                  &subtract_thompson_from_peaklist( ($param_peak_mass - 1) / $charge, \@pl );
                  if ( $param_add_filename_to_title ) {
                     &add_to_title( \@pl, $file );
                  }
                  print $output @pl;
               }
            }
         }
      }
      else {
#         print $output @pl;
      }
      @pl = ();
   }
   close RH;
}

close $output if $param_output_filename;

exit 0;

## end main ##



sub get_next_peaklist {
   my $rh = shift @_;
   my $array = shift @_;

   my $line;
   my $return_value = 0;
   my $index = 0;
   my $pepmass = 0;
   my $charge = 0;
   my $elution = 0;

ION: while ( $line = <$rh> ) {
SW:   for ( $line ) {
         ( /^BEGIN IONS/ ) and do {
            # not the first read line
            if ( $index ) {
               # rewind the file handle
               seek $rh, - length( $line ), '01';
               $return_value = "not_peaklist";
               last ION;
            }
         };
         ( /^PEPMASS=/ ) and do {
            $pepmass = $';
            chomp $pepmass;
            $pepmass =~ s/\s+.*//;
         };
         ( /^CHARGE=/ ) and do {
            $charge = $';
            chomp $charge;
            $charge =~ s/[+-]//;
         };
         ( /^TITLE=Elution from: ([^ ]*)/ ) and do {
            $elution = $1;
         };
         ( /Elution: ([^ ]*) min/ ) and do {
            $elution = $1;
         };
         ( /\(rt=([\d\.]*)\)/ ) and do {
            $elution = $1;
         };
         ( 1 ) and do {
            if ( $index == 0 ) {
            }
            $index++;
            push @$array, $line;
         };
         ( /^END IONS/ ) and do {
            $return_value = "peaklist";
            last ION;
         };
     }
  }
  if ( eof( $rh ) and not $return_value ) {
     return ();
  }
  elsif ( $return_value eq "not_peaklist" ) {
     return( 0, 0, 0 );
  }
  else {
     return( $pepmass, $charge, $elution );
  }

}
## end get_next_peaklist ##

sub subtract_thompson_from_peaklist {
   my $thompson_to_subtract = shift @_;
   my $pl = shift @_;

   my ( $old_pepmass, $new_pepmass, $i ) = ();

   for ( $i = 0; $i < scalar @$pl; $i++ ) {

      if ( $$pl[$i] =~ /PEPMASS=/ ) {
         $old_pepmass = $';
         chomp $old_pepmass;
         $old_pepmass =~ s/\s+.*//;
         $new_pepmass = $old_pepmass - $thompson_to_subtract;
         $$pl[$i] = "PEPMASS=$new_pepmass" . $/;
      }
   }
}
## end subtract_thompson_from_peaklist ##

sub has_peak {
   my $target_mass = shift @_;
   my $param_peak_mass_tolerance = shift @_;
   my $pl = shift @_;

   my ( $line, $mass, $intensity ) = ();

   $charge =~ s/[+-]//;
   foreach $line ( @$pl ) {
      if ( $line =~ /^\d/ ) {
         ( $mass, $intensity ) = split /\s/, $line;
         chomp $intensity;
         if ( ( ($target_mass - $param_peak_mass_tolerance) <= $mass ) and
              ( $mass <= ($target_mass + $param_peak_mass_tolerance) ) ) {
            return $intensity;
         }
      }
   }
   return -1;
}
## end has_peak ##

sub add_to_title {
   my $pl = shift @_;
   my $text = shift @_;
   my $i = 0;

   for ( $i = 0; $i < scalar @$pl; $i++ ) {
      if ( $$pl[$i] =~ /TITLE=/ ) {
         $$pl[$i] =~ s/\r?\n/ $text$&/;
      }
   }
}
## end add_to_title

__END__
