package Bio::FastaGrep;

=pod

=head1 NAME

Bio::FastaGrep

=head1 SYNOPSIS

use Bio::FastaGrep qw/fastagrep/;
foreach (fastagrep( qr/\bIPI00580177\b/, '~/data-ext/resources/fasta/FF.ipi.CHICK.*.fasta')){
	my ($f,$i,$h,$s) = @$_; # file, index, header, sequence
	print "$f:$i:$h\n$s\n";
}

=head1 DESCRIPTION

Bio::FastaGrep does a grep-link function on fasta files.  There are a few simple options that you can supply, along with a pattern and a list of files/fileglobs.  The module exports nothing by default but can export fastagrep.  Usage of fastagrep is:

=over 4

=item fastagrep @options, $pattern, @filepatterns

=back

Options are described below.  Pattern is a perl regular expression.  Filepatterns is one or more strings to give to perl's glob function.

=head1 OPTIONS

=over 4

=item -S | -H | -A

=back

The default is -H, which means that only the header of each record is evaluated with the supplied pattern.

-S means that the sequence is evaluated and -A mean all is evaluated.

=over 4

=item -n => $char

=back

The -n option allows the user to specify a difference newline (default is "\\n").  This is quite important!  You can specify 'n', 'r' or 'nr', leaving out the backslashes if you like as fastagrep will correct this automatically.

=head1 AUTHOR

Jimi-Carlo Bukowski-Wills

=head1 SEE ALSO

File::Grep Bio::DB::Fasta Bio::SearchIO

=cut

use strict;
use warnings;
use IO::File;
use Carp;

use base 'Exporter';

our @EXPORT_OK = (qw/fastagrep/);

use subs qw/fastagrep/;

sub fastagrep {
	my $mode = '-H';
	my $newline = "\n";
	local $/ = $newline.'>';
	while($_[0] =~ /^-\w+/){
		if($_[0] eq '-n'){
			shift @_;
			$newline = shift @_;
			$newline =~ s/n/\n/g;
			$newline =~ s/r/\r/g;
		}
		else {
			$mode = shift @_;
		}
	}
	my $pattern = shift @_;
	my @results = ();
	while(@_){
		my $glob = shift @_;
		foreach my $fn(glob $glob){
			my $fh = new IO::File($fn) or croak "Could not read $fn $!";
			my $i = 0;
			while(<$fh>){
				chomp;
				my ($h,$s) = split /$newline/, $_, 2;
				$s =~ s/\s+//gs;
				$h =~ s/^>// unless $i;
				if(
					   ($mode eq '-H' && $h =~ /$pattern/)
					|| ($mode eq '-S' && $s =~ /$pattern/)
					|| ($mode eq '-A' && $h."\n".$s =~ /$pattern/)
				){
					push @results, [$fn,$i,$h,$s];
				}
				$i ++;
			}
		}
	}
	return @results;
}