#!/usr/bin/perl
use strict;
use warnings;

my $VERSION = 1.0;

use Text::CSV;

my $csv = Text::CSV->new ( { binary => 1 } ) 
                 or die "Cannot use CSV: ".Text::CSV->error_diag ();
				 
use Lingua::DxExtractor;

use Cwd;
my $dir = getcwd;

my ($active_section,@target,@skip,@absolute_positive,@absolute_negative,$start_phrase);

open my $config, $dir . '/config.txt' or die $dir. '/config.txt' . " $!";
while ( my $row = $csv->getline( $config ) ) {
  next unless @$row[0];
  
  if ( @$row[0] =~ /^#(.*)\Z/ ) {
    $active_section = $1;
	next;
  }
  @$row[0] =~ s/\s+$//;
  
  if ( $active_section eq 'target_phrases' ) {
    push @target, @$row[0];
  } elsif ( $active_section eq 'skip_phrases' ) {
    push @skip, @$row[0]; 
  } elsif ( $active_section eq 'absolute_negative_assertions' ) {
    push @absolute_negative, @$row[0];
  } elsif ( $active_section eq 'absolute_positive_assertions' ) {
    push @absolute_positive, @$row[0];
  } elsif ( $active_section eq 'start_phrase' ) {
    $start_phrase = @$row[0];
  }
}
close $config;

my $extractor = Lingua::DxExtractor->new( { 
	target_phrases => \@target, 
	skip_phrases => \@skip, 
	absolute_present_phrases => \@absolute_positive,
	absolute_negative_phrases => \@absolute_negative,
	start_phrase => $start_phrase,
} ); 

opendir DIR, $dir or die "Can't open directory $dir: $!\n";
while (my $file = readdir(DIR )) {
  next unless $file =~ /csv/;

  open my $fh, "$dir/$file" or die "$dir$file: $!";
  $file =~ s/\.csv//;
  $file = lc($file);
  open OUTFILE, ">result_$file.csv";
  
  my $row_count = 0;
  while ( my $row = $csv->getline( $fh ) ) {
    if ( $row_count++ == 0 ) {
	  print OUTFILE (join ', ', @$row) . ", Outcome, Ambiguous?, Debug\n";
	} else {
      my $answer = $extractor->process_text( @$row[0] );
      my $ambiguous = $extractor->ambiguous || 0;	
	  my $debug = $extractor->debug;
      print OUTFILE ( join ',', map { qq{"$_"} } @$row, $answer, $ambiguous, $debug ) . qq{\n};
    }
	$extractor->reset;	
  }
  close OUTFILE;
  close $fh;
}
closedir DIR;

=head1 NAME

simpleNLP - a script that reads through medical reports and flags the presenece  or absence of a condition in each report.

=head1 DESCRIPTION

Reads a config.txt file in the same directory to define a Lingua::DxExtractor object then reads a csv file in the same directory to parse reports and assign outcomes using the DxExtractor object.

=head1 README

Place this file in a folder with a config.txt and a datafile in csv format with column names in the first row and  the fulltext reports that need to be parsed in the first column. Run the script and a new csv file will be generated with 3 new columns added for each row with an outcome, ambiguous flag and a debug section.

The config.txt file should contain a line with #target_phrases followed by phrases (one per line). If needed sections can be added for #skip_phrases, 
#absolute_negative_assertions, #absolute_positive_assertions, and #start_phrase.

=head1 PREREQUISITES

This script requires the C<strict> and C<warnings> modules.  It also requires
C<Lingua::DxExtractor 2.30> which in turn requires C<Lingua::NegEx>.

=head1 COREQUISITES

=pod OSNAMES

any

=pod SCRIPT CATEGORIES

Fun/Educational

=cut

