#!/usr/bin/env perl

#######################################################################
# Copyright (C) 2006-2008 by Carnegie Mellon University.
#
# @OPENSOURCE_HEADER_START@
#
# Use of the SILK system and related source code is subject to the terms
# of the following licenses:
#
# GNU Public License (GPL) Rights pursuant to Version 2, June 1991
# Government Purpose License Rights (GPLR) pursuant to DFARS 252.225-7013
#
# NO WARRANTY
#
# ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER
# PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY
# PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN
# "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY
# KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT
# LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE
# OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT,
# SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY
# TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF
# WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES.
# LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF
# CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON
# CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE
# DELIVERABLES UNDER THIS LICENSE.
#
# Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie
# Mellon University, its trustees, officers, employees, and agents from
# all claims or demands made against them (and any related losses,
# expenses, or attorney's fees) arising out of, or relating to Licensee's
# and/or its sub licensees' negligent use or willful misuse of or
# negligent conduct or willful misconduct regarding the Software,
# facilities, or other rights or assistance granted by Carnegie Mellon
# University under this License, including, but not limited to, any
# claims of product liability, personal injury, death, damage to
# property, or violation of any laws or regulations.
#
# Carnegie Mellon University Software Engineering Institute authored
# documents are sponsored by the U.S. Department of Defense under
# Contract F19628-00-C-0003. Carnegie Mellon University retains
# copyrights in all material produced under this contract. The U.S.
# Government retains a non-exclusive, royalty-free license to publish or
# reproduce these documents, or allow others to do so, for U.S.
# Government purposes only pursuant to the copyright license under the
# contract clause at 252.227.7013.
#
# @OPENSOURCE_HEADER_END@
#
#######################################################################
# $SiLK: rwscanquery 12109 2008-07-28 15:43:12Z mthomas $
#######################################################################
# rwscanquery
#
# Query and generate reports from a network scan database.
#######################################################################

use strict;
use warnings;

use Data::Dumper;
use FindBin;
use IO::File;
use Pod::Usage;
use File::Temp;

### Config

my $conf_db_driver;
my $conf_db_user;
my $conf_db_pass;
my $conf_db_instance;
my $conf_rw_in_class;
my $conf_rw_in_type;
my $conf_rw_out_class;
my $conf_rw_out_type;

### Prototypes for subs defined later

sub db_connect_oracle();
sub db_connect_postgresql();
sub db_connect_mysql();
sub load_rcfile();
sub val_date (\$);
sub val_range (\$$;\$);
sub val_ip (\$$);
sub val_set (\$$);
sub val_text (\$$;%);
sub val_bool (\$$);
sub do_query($);
sub do_query2($);    # hack for testing DBD::Oracle
sub write_standard_results($);
sub write_export_results($);
sub write_volume_results($);
sub write_scan_set($);
sub write_scan_flows($);
sub write_resp_flows($);

### Argument processing

my $opt_outfile;

my $opt_start_hour;
my $opt_end_hour;
my $opt_saddress;
my $opt_sipset;
my $opt_daddress;
my $opt_dipset;
my $opt_report = "standard";
my $opt_volume_summary;
my $opt_show_header = 0;
my $opt_columnar    = 0;

my $opt_database;

my $opt_verbose;

my $opt_help;
my $opt_man;

my $appname = $0;
$appname =~ s/.*\///;

# The queries below are optimized based on the fact that scans in the database
# have a maximum duration of about an hour.  There are some cases where a scan
# for a particular hour can begin in the previous hour, though, so the date
# arithmetic looks back (and ahead) of the time period in question a little
# bit.  Also, note that as of this writing, only the Oracle queries have been
# heavily performance tested, though all have been tested for proper selection.

my %queries = (
    "oracle" => {
        "standard" => q{
            SELECT s.id, s.sip, s.stime, s.etime, s.proto,
              s.flows, s.packets, s.bytes
            FROM scans s
            WHERE s.stime < to_date('$end_hour') + 1/24
            AND s.etime >= to_date('$start_hour')
            AND s.stime >= to_date('$start_hour') - 1/24
            AND s.etime < to_date('$end_hour') + 2/24
            $saddress_part
            $sipset_part
        },
        "volume" => q{
            SELECT TO_CHAR(s.stime, 'YYYY/MM/DD') AS scan_date,
            SUM(s.flows) AS flows,
            SUM(s.packets) AS pkts,
            SUM(s.bytes) AS bytes
            FROM scans s
            WHERE s.stime < to_date('$end_hour') + 1
            AND s.etime >= to_date('$start_hour')
            AND s.stime >= to_date('$start_hour') - 1
            AND s.etime < to_date('$end_hour') + 1
            $saddress_part
            $sipset_part
            GROUP BY TO_CHAR(s.stime, 'YYYY/MM/DD')
            ORDER BY TO_CHAR(s.stime, 'YYYY/MM/DD')
        },
        "scanip" => q{
            SELECT DISTINCT s.sip
            FROM scans s
            WHERE s.stime < to_date('$end_hour') + 1/24
            AND s.etime >= to_date('$start_hour')
            AND s.stime >= to_date('$start_hour') - 1/24
            AND s.etime < to_date('$end_hour') + 2/24
            $saddress_part
            $sipset_part
        },
        "export" => q{
            SELECT s.id, s.sip, s.proto, s.stime, s.etime,
              s.flows, s.packets, s.bytes, s.scan_model, s.scan_prob
            FROM scans s
            WHERE s.stime < to_date('$end_hour') + 1/24
            AND s.etime >= to_date('$start_hour')
            AND s.stime >= to_date('$start_hour') - 1/24
            AND s.etime < to_date('$end_hour') + 2/24
            $saddress_part
            $sipset_part
        }
    },
    "postgresql" => {
        "standard" => q{
            SELECT s.id, s.sip, s.stime, s.etime, s.proto,
              s.flows, s.packets, s.bytes
            FROM scans s
            WHERE s.stime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
                + INTERVAL '1 HOUR'
            AND s.etime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
            AND s.stime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
                - INTERVAL '1 HOUR'
            AND s.etime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
                + INTERVAL '2 HOUR'
            $saddress_part
            $sipset_part
        },
        "volume" => q{
            SELECT to_char(s.stime, 'YYYY/MM/DD') AS scan_date,
            SUM(s.flows) AS flows,
            SUM(s.packets) AS pkts,
            SUM(s.bytes) AS bytes
            FROM scans s
            WHERE s.etime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
                + INTERVAL '1 DAY'
            AND s.stime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
            AND s.stime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
                - INTERVAL '1 DAY'
            AND s.etime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
                + INTERVAL '1 DAY'
            $saddress_part
            $sipset_part
            GROUP BY to_char(s.stime, 'YYYY/MM/DD')
            ORDER BY to_char(s.stime, 'YYYY/MM/DD')
        },
        "scanip" => q{
            SELECT DISTINCT s.sip
            FROM scans s
            WHERE s.etime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
                + INTERVAL '1 HOUR'
            AND s.stime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
            AND s.stime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
                - INTERVAL '1 HOUR'
            AND s.etime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
                + INTERVAL '2 HOUR'
            $saddress_part
            $sipset_part
        },
        "export" => q{
            SELECT s.id, s.sip, s.proto, s.stime, s.etime,
              s.flows, s.packets, s.bytes, s.scan_model, s.scan_prob
            FROM scans s
            WHERE s.etime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
                + INTERVAL '1 HOUR'
            AND s.stime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
            AND s.stime >= to_timestamp('$start_hour', 'YYYY/MM/DD:HH24')
                - INTERVAL '1 HOUR'
            AND s.etime < to_timestamp('$end_hour', 'YYYY/MM/DD:HH24')
                + INTERVAL '2 HOUR'
            $saddress_part
            $sipset_part
        }
    },
    "mysql" => {
        "standard" => q{
            SELECT s.id, s.sip, s.stime, s.etime, s.proto,
              s.flows, s.packets, s.bytes
            FROM scans s
            WHERE s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
                + INTERVAL 1 HOUR
            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
                - INTERVAL 1 HOUR
            AND s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
                + INTERVAL 2 HOUR
            $saddress_part
            $sipset_part
        },
        "volume" => q{
            SELECT DATE_FORMAT(s.stime, '%Y/%m/%d:%H') AS scan_date,
            SUM(s.flows) AS flows,
            SUM(s.packets) AS pkts,
            SUM(s.bytes) AS bytes
            FROM scans s
            WHERE s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
                + INTERVAL 1 DAY
            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
                - INTERVAL 1 DAY
            AND s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
                + INTERVAL 1 DAY
            $saddress_part
            $sipset_part
            GROUP BY DATE_FORMAT(s.stime, '%Y/%m/%d')
            ORDER BY DATE_FORMAT(s.stime, '%Y/%m/%d')
        },
        "scanip" => q{
            SELECT DISTINCT s.sip
            FROM scans s
            WHERE s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
                + INTERVAL 1 HOUR
            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
                - INTERVAL 1 HOUR
            AND s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
                + INTERVAL 2 HOUR
            $saddress_part
            $sipset_part
        },
        "export" => q{
            SELECT s.id, s.sip, s.proto, s.stime, s.etime,
              s.flows, s.packets, s.bytes, s.scan_model, s.scan_prob
            FROM scans s
            WHERE s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
                + INTERVAL 1 HOUR
            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
            AND s.stime >= STR_TO_DATE('$start_hour', '%Y/%m/%d:%H')
                - INTERVAL 1 HOUR
            AND s.etime < STR_TO_DATE('$end_hour', '%Y/%m/%d:%H')
                + INTERVAL 2 HOUR
            $saddress_part
            $sipset_part
        }
    }
);


use Getopt::Long;

GetOptions(
    'start-date=s', \$opt_start_hour,
    'end-date=s',   \$opt_end_hour,
    'saddress=s',   \$opt_saddress,
    'sipset=s',     \$opt_sipset,
    'daddress=s',   \$opt_daddress,
    'dipset=s',     \$opt_dipset,
    'report=s',     \$opt_report,
    'show-header!', \$opt_show_header,
    'columnar!',    \$opt_columnar,
    'output-path=s', \$opt_outfile,
    'database=s',   \$opt_database,

    'verbose|v!', \$opt_verbose,

    'help', \$opt_help,
    'man',  \$opt_man

) or pod2usage( -exitval => -1 );

pod2usage( -exitval => 0 ) if $opt_help;
pod2usage( -exitval => 0, -verbose => 2 ) if $opt_man;

require DBI;

# option verification, etc.

my $outfile    = "";    # for Perl filehandles
my $outfile_rw = "";    # for rw tools

if ($opt_outfile) {
    $outfile = $outfile_rw = $opt_outfile;
}
else {
    $outfile    = "&STDOUT";
    $outfile_rw = "stdout";
}

if ( $outfile =~ qr  /(^[\w\+_\040\#\(\)\{\}\[\]\/\-\^,\.:;&%@\\~]+\$?$)/ ) {
    $outfile = $1;
}
else {
    die "$appname: Invalid characters in filename\n";
}

print "writing results to $outfile\n" if $opt_verbose;

if ( !defined $opt_start_hour ) {
    my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
    $opt_start_hour = sprintf "%04d/%02d/%02d", $year + 1900, $month + 1, $day;
}

if (   defined($opt_start_hour)
    && $opt_start_hour !~ /:\d+/
    && $opt_report !~ /volume/ )
{
    $opt_end_hour = $opt_start_hour . ":23";
}

val_date $opt_start_hour;
val_date $opt_end_hour;

val_ip $opt_saddress, 's.sip';
val_set $opt_sipset,  's.sip';

if ( defined $opt_start_hour && !defined $opt_end_hour ) {

    # if no end time and start time has no hour part, query the whole day
    $opt_end_hour = $opt_start_hour;
}

load_rcfile();

if ( !defined $opt_database ) {
    if ( defined $conf_db_instance ) {
        $opt_database = $conf_db_instance;
    }
    else {
        $opt_database = "SCAN";
    }
}

my $dbh;

if ( !defined $conf_db_driver ) {
    die "$appname: No database driver specified\n";
}
elsif ( $conf_db_driver =~ /oracle/i ) {
    $dbh = db_connect_oracle();

}
elsif ( $conf_db_driver =~ /postgresql/i ) {
    $dbh = db_connect_postgresql();
}
elsif ( $conf_db_driver =~ /mysql/i ) {
    $dbh = db_connect_mysql();
}
else {
    die "$appname: Unknown db_driver: $conf_db_driver\n";
}

my $sth;

foreach ($opt_report) {
    if (/standard/i) {
        open( OUTF, ">$outfile" )
            or die "$appname: Cannot open output file '$outfile': $!\n";

        #    $sth = do_standard_query();
        $sth = do_query2("standard");
        write_standard_results($sth);
        close(OUTF);
    }
    elsif (/volume/i) {
        open( OUTF, ">$outfile" )
            or die "$appname: Cannot open output file '$outfile': $!\n";

        #$sth = do_volume_query();
        $sth = do_query2("volume");
        write_volume_results($sth);
        close(OUTF);
    }
    elsif (/scanset/i) {

        #$sth = do_scan_ip_query();
        $sth = do_query2("scanip");
        write_scan_set($sth);
    }
    elsif (/scanflows/i) {

        #$sth = do_scan_ip_query();
        $sth = do_query2("scanip");
        write_scan_flows($sth);
    }
    elsif (/respflows/i) {

        #$sth = do_scan_ip_query();
        $sth = do_query2("scanip");
        write_resp_flows($sth);
    }
    elsif (/export$/i) {
        open( OUTF, ">$outfile" )
            or die "$appname: Cannot open output file '$outfile': $!\n";

        #$sth = do_export_query();
        $sth = do_query2("export");
        write_export_results($sth);
        close(OUTF);
    }
    elsif (/export2/i) {
        open( OUTF, ">$outfile" )
            or die "$appname: Cannot open output file '$outfile': $!\n";

        #$sth = do_export_query();
        $sth = do_query2("export2");
        write_export_results($sth);
        close(OUTF);
    }

}

$sth->finish;
$dbh->disconnect;

exit 0;

# Helper functions

sub val_date (\$) {
    my $in = shift;
    return unless defined $$in;
    if (
        $$in =~ m{ ^
    (\d{1,4})/(\d{1,2})/(\d{1,2}):?(\d{1,2})?
    $ }x
      )
    {

        # Has a full date, may or may not have further date parts
        my ( $year, $mon, $day, $hour ) = ( $1, $2, $3, $4 );
        $hour = 0 unless defined $hour;
        print "$$in -> " if $opt_verbose;
        $$in = sprintf( "%04d/%02d/%02d:%02d", $year, $mon, $day, $hour );
        print "$$in\n" if $opt_verbose;
    }
    else {
        die "$appname: Invalid date: $$in\n";
    }
}

### Validate an IP argument, and parse into ranges

sub val_ip (\$$) {
    my $in  = shift;
    my $var = shift;
    return unless defined $$in;
    my @result = ();
    my @parts = split ',', $$in;
    foreach my $part (@parts) {
        if (
            $part =~ m{ ^(\d{1,3})
      (?:\.(\d{1,3}))?
      (?:\.(\d{1,3}))?
      (?:\.(\d{1,3}))?
      /
      (\d{1,2})$ }x
          )
        {
            my ( $ip1, $ip2, $ip3, $ip4, $mask ) =
              ( $1, $2 || 0, $3 || 0, $4 || 0, $5 );
            if (   ( $ip1 > 255 )
                || ( $ip2 > 255 )
                || ( $ip3 > 255 )
                || ( $ip4 > 255 )
                || ( $mask > 32 ) )
            {
                die "$appname: Invalid address spec: $$in\n";
            }
            my $ip = ( $ip1 << 24 ) | ( $ip2 << 16 ) | ( $ip3 << 8 ) | $ip4;
            my $bitmask = oct( '0b' . ( '1' x $mask . '0' x ( 32 - $mask ) ) );
            my $min_ip  = $ip & $bitmask;
            my $max_ip  = $ip | ~$bitmask;
            push @result, "($var >= $min_ip AND $var <= $max_ip)";
        }
        elsif (
            $part =~ m{ ^ (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3}) -
      (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3}) $ }x
          )
        {
            my ( $ip1, $ip2, $ip3, $ip4, $ip5, $ip6, $ip7, $ip8 ) =
              ( $1, $2, $3, $4, $5, $6, $7, $8 );
            if ( grep { $_ > 255 }
                ( $ip1, $ip2, $ip3, $ip4, $ip5, $ip6, $ip7, $ip8 ) )
            {
                die "$appname: Invalid address spec: $$in\n";
            }
            my $ip_a = ( $ip1 << 24 ) | ( $ip2 << 16 ) | ( $ip3 << 8 ) | $ip4;
            my $ip_b = ( $ip5 << 24 ) | ( $ip6 << 16 ) | ( $ip7 << 8 ) | $ip8;
            push @result, "($var >= $ip_a AND $var <= $ip_b)";
        }
        elsif ( $part =~ m{^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$}x ) {
            my ( $ip1, $ip2, $ip3, $ip4 ) = ( $1, $2, $3, $4 );
            if ( grep { $_ > 255 } ( $ip1, $ip2, $ip3, $ip4 ) ) {
                die "$appname: Invalid address spec: $$in\n";
            }
            my $ip = ( $ip1 << 24 ) | ( $ip2 << 16 ) | ( $ip3 << 8 ) | $ip4;
            push @result, "($var = $ip)";
        }
        else {
            die "$appname: Invalid address spec: $$in\n";
        }
    }
    $$in = join " or\n             ", @result;
}

### Validate an ipset argument, and parse the set into ranges

sub val_set (\$$) {
    my $in  = shift;
    my $var = shift;
    return unless defined $$in;
    if ( !-e $$in ) {
        die "$appname: Invalid (non-existent) ipset file: $$in\n";
    }
    my @result = ();
    my $file   = new IO::File "readset --integer-ips $$in|";
    my $first;
    my $last;
    while (<$file>) {
        chomp;
        s/\s//g;
        if ( !defined $last ) {
            $first = $_;
        }
        elsif ( $_ != $last + 1 ) {
            if ( $first == $last ) {
                push @result, "($var = $first)";
            }
            else {
                push @result, "($var >= $first AND $var <= $last)";
            }
            $first = $_;
        }
        $last = $_;
    }
    if ( defined $last ) {
        if ( $first == $last ) {
            push @result, "($var = $first)";
        }
        else {
            push @result, "($var >= $first AND $var <= $last)";
        }
    }
    $$in = join " or\n             ", @result;
}

sub load_rcfile() {
    my $HOME   = ( getpwuid($<) )[7];
    my $rcfile = "$HOME/.rwscanrc";

    # First, look for .rwscanrc in the current user's home directory
    if ( !-f $rcfile ) {

        # If no .rwscanrc exists in the user's ~, we look in the directory
        # one level up from where the script runs, under the assumption that
        # the script is in a "bin" subdirectory.

        my $script_root = "$FindBin::Bin";
        $script_root =~ s@/bin$@@;
        $rcfile = "$script_root/share/silk/.rwscanrc";
        if ( !-f $rcfile ) {
            $rcfile = "$script_root/.rwscanrc";
            if ( !-f $rcfile ) {
                warn ("$appname: Could not find .rwscanrc file,",
                      " defaults will be used\n");
                return -1;
            }
        }
    }
    my %rcopts;
    open( RCFILE, $rcfile )
        or die "$appname: Cannot open '$rcfile': $!\n";
    {
        while (<RCFILE>) {
            next if (/^#/);
            if (/^(\S+)\s*=\s*(.*)\s*$/) {
                $rcopts{$1} = $2;
            }
        }
        close(RCFILE);
    }

    if ( defined $rcopts{'db_driver'} ) {
        $conf_db_driver = $rcopts{'db_driver'};
        if ( $conf_db_driver =~ /oracle/ ) {
            $conf_db_driver = "oracle";
            die if !eval { require DBD::Oracle; };
        }
        elsif ( $conf_db_driver =~ /postgresql/i ) {
            $conf_db_driver = "postgresql";
            die if !eval { require DBD::Pg; };
        }
        elsif ( $conf_db_driver =~ /mysql/i ) {
            $conf_db_driver = "mysql";
            die if !eval { require DBD::mysql; };
        }
    }
    else {
        warn ("$appname: Warning: db_driver not specified in rcfile,",
              " defaulting to Oracle\n");
        $conf_db_driver = "oracle";
    }

    if ( !defined $rcopts{'oracle_userid'} ) {
        $conf_db_user = $rcopts{'db_userid'};
        $conf_db_pass = $rcopts{'db_password'};
    }
    else {
        warn ("$appname: Warning: using legacy option 'oracle_userid'. ",
              "Please use 'db_userid' instead.\n");
        $conf_db_user = $rcopts{'oracle_userid'};
        $conf_db_pass = $rcopts{'oracle_password'};
    }

    if ( defined $rcopts{'db_instance'} ) {
        $conf_db_instance = $rcopts{'db_instance'};
    }

    if ( defined $rcopts{'rw_in_class'} ) {
        $conf_rw_in_class = $rcopts{'rw_in_class'};
    }

    if ( defined $rcopts{'rw_in_type'} ) {
        $conf_rw_in_type = $rcopts{'rw_in_type'};
    }

    if ( defined $rcopts{'rw_out_class'} ) {
        $conf_rw_out_class = $rcopts{'rw_out_class'};
    }

    if ( defined $rcopts{'rw_out_type'} ) {
        $conf_rw_out_type = $rcopts{'rw_out_type'};
    }
}

sub db_connect_oracle() {
    my $dbh =
      DBI->connect( "dbi:Oracle:$opt_database", $conf_db_user, $conf_db_pass )
      or die "$appname: Oracle connect failed: $!";
    $dbh->do("alter session set NLS_DATE_FORMAT = 'YYYY/MM/DD:HH24:MI:SS'");
    return $dbh;
}

sub db_connect_postgresql() {
    my $dbh =
      DBI->connect( "dbi:Pg:dbname=$opt_database", $conf_db_user,
        $conf_db_pass )
      or die "$appname: Postgress connect failed: $!";

    #  $dbh->do("alter session set NLS_DATE_FORMAT = 'YYYY/MM/DD:HH24:MI:SS'");
    return $dbh;
}

sub db_connect_mysql() {
    my $dbh =
      DBI->connect( "dbi:mysql:$opt_database", $conf_db_user, $conf_db_pass )
      or die "$appname: MySQL connect failed: $!";
    return $dbh;
}

sub do_query($) {
    my ($type) = @_;
    my $saddress_part = "";
    if ( defined $opt_saddress ) {
        $saddress_part = qq{
      AND ($opt_saddress)
    };
    }
    my $sipset_part = "";
    if ( defined $opt_sipset ) {
        $sipset_part = qq{
      AND ($opt_sipset)
    };
    }
    my $query = $queries{$conf_db_driver}{$type};
    $query =~ s/\$saddress_part/$saddress_part/;
    $query =~ s/\$sipset_part/$sipset_part/;

    print "$type query:\n$query\n" if $opt_verbose;

    my $sth = $dbh->prepare( $query, { ora_check_sql => 0 } );

    $sth->bind_param( 1, $opt_end_hour );
    $sth->bind_param( 2, $opt_start_hour );
    my $rv = $sth->execute;
    return $sth;
}

sub do_query2($) {
    my ($type) = @_;
    my $saddress_part = "";
    if ( defined $opt_saddress ) {
        $saddress_part = qq{
      AND ($opt_saddress)
    };
    }

    my $sipset_part = "";
    if ( defined $opt_sipset ) {
        $sipset_part = qq{
      AND ($opt_sipset)
    };
    }

    my $query = $queries{$conf_db_driver}{$type};

    $query =~ s/\$saddress_part/$saddress_part/g;
    $query =~ s/\$sipset_part/$sipset_part/g;

    $query =~ s/\$start_hour/$opt_start_hour/g;
    $query =~ s/\$end_hour/$opt_end_hour/g;

    print "$type query:\n$query\n" if $opt_verbose;

    my $sth = $dbh->prepare( $query, { ora_check_sql => 0 } );

    my $rv = $sth->execute;
    return $sth;
}

sub write_standard_results($) {
    my ($sth) = @_;
    my (
        $s_id,  $s_stime, $s_proto, $s_etime,
        $s_sip, $s_flows, $s_pkts,  $s_bytes
    );

    $sth->bind_columns(
        \(
            $s_id,    $s_sip,   $s_stime, $s_etime,
            $s_proto, $s_flows, $s_pkts,  $s_bytes
        )
    );

    if ($opt_columnar) {
        if ($opt_show_header) {
            printf OUTF "%10s %-19s %-19s %-10s %-15s %10s %10s %14s\n",
              "scan-id", "start-time", "end-time", "protocol", "source-address",
              "flows", "packets", "bytes",;
        }
        open( SAVE_STDERR, ">&STDERR" );
        open( STDERR,      ">/dev/null" );
        eval {
            while ( $sth->fetch )
            {
                my $o_sip = join( '.', unpack( 'C4', pack( 'N', $s_sip ) ) );
                printf OUTF "%10d %-19s %-19s %-10d %-15s %10d %10d %14s\n",
                  $s_id, $s_stime, $s_etime, $s_proto, $o_sip, $s_flows,
                  $s_pkts, $s_bytes;
            }
        };
        open( STDERR, ">&SAVE_STDERR" );
    }
    else {
        if ($opt_show_header) {
            print OUTF
              "scan-id|stime|etime|proto|srcaddr|flows|packets|bytes\n";
        }

        # This song and dance is to make it so that ^C doesn't cause
        # an ugly error message.
        use vars qw( *SAVE_STDERR );    # to avoid warning
        open( SAVE_STDERR, ">&STDERR" );
        open( STDERR,      ">/dev/null" );

        eval {
            while ( $sth->fetch )
            {
                my $o_sip = join( '.', unpack( 'C4', pack( 'N', $s_sip ) ) );
                print OUTF "$s_id|$s_stime|$s_etime|$s_proto|$o_sip|"
                  . "$s_flows|$s_pkts|$s_bytes\n";
            }
        };
        open( STDERR, ">&SAVE_STDERR" );
    }

}

sub write_export_results($) {
    my ($sth) = @_;
    my (
        $s_id,         $s_stime, $s_proto, $s_etime,
        $s_sip,        $s_flows, $s_pkts,  $s_bytes,
        $s_scan_model, $s_scan_prob
    );

    $sth->bind_columns(
        \(
            $s_id,         $s_sip,   $s_proto, $s_stime,
            $s_etime,      $s_flows, $s_pkts,  $s_bytes,
            $s_scan_model, $s_scan_prob
        )
    );

    if ($opt_show_header) {
        print OUTF "id|sip|proto|stime|etime|"
          . "flows|packets|bytes|scan_model|scan_prob\n";
    }

    use vars qw( *SAVE_STDERR );    # to avoid warning
    open( SAVE_STDERR, ">&STDERR" );
    open( STDERR,      ">/dev/null" );

    eval {
        while ( $sth->fetch )
        {
            print OUTF "$s_id|$s_sip|$s_proto|$s_stime|$s_etime|"
              . "$s_flows|$s_pkts|$s_bytes|$s_scan_model|$s_scan_prob\n";
        }
    };
    open( STDERR, ">&SAVE_STDERR" );
}

sub write_volume_results($) {
    my ($sth) = @_;
    my ( $s_stime, $s_flows, $s_pkts, $s_bytes );
    $sth->bind_columns( \( $s_stime, $s_flows, $s_pkts, $s_bytes ) );

    if ($opt_columnar) {
        if ($opt_show_header) {
            printf OUTF "%-19s %10s %10s %14s", "date", "flows", "packets",
              "bytes\n";
        }
        open( SAVE_STDERR, ">&STDERR" );
        open( STDERR,      ">/dev/null" );
        eval {
            while ( $sth->fetch )
            {
                printf OUTF "%-19s %10d %10d %14s\n", $s_stime, $s_flows,
                  $s_pkts, $s_bytes;
            }
        };
        open( STDERR, ">&SAVE_STDERR" );
    }
    else {
        if ($opt_show_header) {
            print OUTF "date|flows|packets|bytes\n";
        }

        # This song and dance is to make it so that ^C doesn't cause
        # an ugly error message.
        use vars qw( *SAVE_STDERR );    # to avoid warning
        open( SAVE_STDERR, ">&STDERR" );
        open( STDERR,      ">/dev/null" );

        eval {
            while ( $sth->fetch )
            {
                print OUTF "$s_stime|$s_flows|$s_pkts|$s_bytes\n";
            }
        };
        open( STDERR, ">&SAVE_STDERR" );
    }
}

sub write_scan_set($) {
    my ($sth) = @_;
    my ($s_srcaddr);

    $sth->bind_columns( \$s_srcaddr );

    if ( $opt_daddress || $opt_dipset ) {
        my @args;
        my $destargs;
        my $set_filename = File::Temp::tmpnam();
        open( IPSET_OUT, "|rwsetbuild stdin '$set_filename'" )
            or die "$appname: rwsetbuild failed: $!";
        while ( $sth->fetch ) {
            my $o_srcaddr =
              join( '.', unpack( 'C4', pack( 'N', $s_srcaddr ) ) );
            print IPSET_OUT $o_srcaddr, "\n"
                or die "$appname: rwsetbuild failed: $!";
        }
        close(IPSET_OUT);

        if ($opt_daddress) {
            $destargs = "--daddress $opt_daddress ";
        }
        else {
            $destargs = "--dipset '$opt_dipset' ";
        }

        system( "rwfilter --start-date $opt_start_hour "
              . "--end-date $opt_end_hour "
              . "--sipset '$set_filename' "
              . "$destargs "
              . "--pass stdout "
              . "| rwset --sip-file '$outfile_rw'" );
        unlink $set_filename;
    }
    else {
        open( OUTF, "|rwsetbuild stdin '$outfile_rw'" )
            or die "$appname: rwsetbuild failed: $!";
        if ( -t OUTF ) {
            die "$appname:Cannot write scan IP set to a terminal.\n"
              . "Please specify an output file on the command line.\n";
        }

        while ( $sth->fetch ) {
            my $o_srcaddr =
              join( '.', unpack( 'C4', pack( 'N', $s_srcaddr ) ) );
            print OUTF $o_srcaddr, "\n"
                or die "$appname: rwsetbuild failed: $!";
        }
        close(OUTF);
    }

}

sub write_scan_flows($) {
    my ($sth) = @_;
    my ($s_srcaddr);
    my $set_filename = File::Temp::tmpnam();
    my @args;

    $sth->bind_columns( \$s_srcaddr );

    open( IPSET_OUT, "|rwsetbuild stdin '$set_filename'" )
      or die "$appname: rwsetbuild failed: $!";
    while ( $sth->fetch ) {
        my $o_srcaddr = join( '.', unpack( 'C4', pack( 'N', $s_srcaddr ) ) );
        print IPSET_OUT $o_srcaddr, "\n"
            or die "$appname: rwsetbuild failed: $!";
    }
    close(IPSET_OUT);

    push( @args,
        '--start-date', $opt_start_hour, '--end-date', $opt_end_hour,
        '--sipset',     $set_filename,   '--pass',     $outfile_rw );

    if ( defined($conf_rw_in_class) ) {
        push( @args, '--class', $conf_rw_in_class );
    }
    if ( defined($conf_rw_in_type) ) {
        push( @args, '--type', $conf_rw_in_type );
    }

    if ( defined($opt_daddress) ) {
        push( @args, '--daddress', $opt_daddress );
    }

    if ( defined($opt_dipset) ) {
        push( @args, '--dipset', $opt_dipset );
    }

    print Dumper(@args);

    system( 'rwfilter', @args );


    unlink $set_filename;
}

sub write_resp_flows($) {
    my ($sth) = @_;
    my ($s_srcaddr);
    my $set_filename = File::Temp::tmpnam();
    my @args;

    $sth->bind_columns( \$s_srcaddr );
    open( IPSET_OUT, "|rwsetbuild stdin '$set_filename'" )
        or die "$appname: rwsetbuild failed: $!";
    while ( $sth->fetch ) {
        my $o_srcaddr = join( '.', unpack( 'C4', pack( 'N', $s_srcaddr ) ) );
        print IPSET_OUT $o_srcaddr, "\n"
            or die "$appname: rwsetbuild failed: $!";
    }
    close(IPSET_OUT);

    push( @args,
        '--start-date', $opt_start_hour, '--end-date', $opt_end_hour,
        '--dipset',     $set_filename,   '--pass',     $outfile_rw );

    # Restrict filter to outgoing flows only.
    if ( defined($conf_rw_out_class) ) {
        push( @args, '--class', $conf_rw_out_class );
    }
    if ( defined($conf_rw_out_type) ) {
        push( @args, '--type', $conf_rw_out_type );
    }

    # These two look funny at first glance.  However, opt_daddress or
    # opt_dipset specify the destinations of scans, and we want the response
    # flows sent back to the scanners.
    if ( defined($opt_daddress) ) {
        push( @args, '--saddress', $opt_daddress );
    }
    if ( defined($opt_dipset) ) {
        push( @args, '--sipset', $opt_dipset );
    }

    system( 'rwfilter', @args );

    unlink $set_filename;
}

__END__

=head1 NAME

B<rwscanquery> - Query the network scan database

=head1 SYNOPSIS

 rwscanquery [options]

Report Options:

  --start-date=yyyy/mm/dd:hh Report on scans active after this date.
  --end-date=yyyy/mm/dd:hh   Defaults to start-date.

  --report=type              Select query and output options.  Allowed
                             values for "type" are standard, volume,
                             scanset, scanflows, respflows, and export

  --saddress=addr-spec       Show scans originating from matching hosts.
  --sipset=set-file          Show scans originating from hosts in set.

  --daddress=addr-spec       Show only scans targeting matching hosts.
  --dipset=set-file          Show only scans targeting hosts in set.

  --show-header              Display column header at start of output.
  --columnar                 Display more human-readable columnar view.
  --output-path=path         Write results to the specified file.

Configuration Options:

  --database=dbname          Query an alternate scan database

Help Options:

  --help                     Display this brief help message.
  --man                      Display the full documentation.

=head1 DESCRIPTION

B<rwscanquery> queries the network scan database.  Selection criteria
and output formats are specified via command line options described below.
Output will go to standard output by default, or I<output_file> if
one is specified.

=head1 REPORT OPTIONS

=over 4

=item B<--start-date>=I<yyyy/mm/dd:hh>

Display scans which were active after this hour.  If this argument
contains a date with no hour, and no B<--end-date> option is
specified, scans for that entire day will be returned.  If this option
is not specified at all, scans for the current day (based on the local
time on the host machine) will be returned.

=over 4

=item Example:

  rwscanquery --start-date=2005/04/19:21

This command would display information on all scans occurring in the
hour from 21:00 up to but not including 22:00 on April 19, 2005.

=back

=item B<--end-date>=I<yyyy/mm/dd:hh>

Display scans which were active before the end of this hour.  If no
end-date is given, defaults to the same as start-date.

=over 4

=item Example:

  rwscanquery --start-date=2005/04/19:21 --end-date=2005/04/19:22

This command would display information on all scans occurring after or
including 21:00 on 2005/04/19, up through but not including 23:00 on
2005/04/19.

=back

=item B<--report>=I<type>

Specifies query and output options based on one of the following types:

=over 4

=item C<standard>

Writes one line of output for each scan record in the scan database.  The
format of this output can be altered with the --columnar option described
below.

=item C<volume>

Writes a daily scan activity volume summary report for each day within the
time period.  The format of this output can be altered with the --show-header
and --columnar options described below.

=item C<scanset>

Writes an IP set file containing the IP addresses which were the sources of
scan activity during the selected time period.

=item C<scanflows>

Writes a SiLK Flow file containing all flows originating from scanning
IP addresses within the specified time period.  This flow data will
include flows originating from any host that would be listed as a scan
source by your query, from any time within the time period specified
by I<--start-date> and I<--end-date>.  Note that this may include
flows that were not identified by the scan analysis as being part of a
scan.

=item C<respflows>

Writes a SiLK Flow file containing all flows sent to scanning IP
addresses within the specified time period.

=item C<export>

Write output consistent with the output format of the B<rwscan(1)> tool.

=back

If no --report option is specified, the default is the "standard" report.

=item B<--saddress>=I<addr-spec>

Display scans originating from hosts described in I<addr-spec>.
I<addr-spec> should be a list of addresses, address ranges, and CIDR
blocks.  Only scans originating at hosts in the list will be
displayed.

=over 4

=item Example:

  rwscanquery --start-date=2005/04/19:21 \
    --saddress=192.168/16,127.0.0.1,255.255.255.0-255.255.255.255

This command would display queries originating from addresses in the
slash-16 block 192.168, or from address 127.0.0.1, or from any address
between 255.255.255.0 and 255.255.255.255, inclusive.

=back

=item B<--sipset>=I<set-file>

Display scans originating from hosts in I<set-file>.  I<set-file>
should be a standard SiLK ipset file.  Note that a very complex set
may take a long time to process, or even fail to return any results.

=over 4

=item Example:

  rwscanquery --start-date=2005/04/19:21 --sipset=MyIPSet.set

This command would display information on all scans in the given hour
which had a source address in the IP set file MyIPSet.set.

=back

=item B<--daddress>=I<ip-mask>

Display scans targeting hosts described in I<ip-mask>.	This option
accepts only a single IP address or wildcard expression as described in
B<rwfilter(1)>.  To match on multiple IPs or networks, use the
B<--dipset> option.

=item B<--dipset>=I<set-file>

Display scans targeting hosts in I<set-file>.  I<set-file> should be a
standard SiLK ipset file.  Note that a very complex set may take a long
time to process, or even fail to return any results.

=item B<--show-header>

Displays a header with a short name for each column as the first line
of output.  By default, no header is displayed.

=item B<--columnar>

Display output in more human-readable columnar format.  By default,
the output is presented as data fields delimited by the `|' character.

=item B<--output-path>

Write results to the specified pathname instead of standard output.

=back

=head1 CONFIGURATION OPTIONS

=over 4

=item B<--database>=I<dbname>

Select a database instance other than the default, which comes from the
B<rwscan> configuration file described below.

=back

=head1 OTHER OPTIONS

=over 4

=item B<--help>

Displays a brief usage message and exits.

=back

=over 4

=item B<--man>

Displays full documentation and exits.  Better formatting may be
achieved by using `perldoc .../rwscanquery' instead.

=back

=head1 CONFIGURATION

B<rwscanquery> reads certain configuration information from a file named
F<.rwscanrc>.  This file is first looked for in the current user's home
directory, and if not found, one directory up from where B<rwscanquery> is
located.  The format of this file is name=value pairs, one per line.

The configuration parameters currently read from F<.rwscanrc> are:

=over 2

=item C<db_driver>

The type of database to connect to.  "oracle", "postgresql", and "mysql" are
currently supported.

=item C<db_userid>

The userid used to connect to the scan database.

=item C<db_password>

The password used to connect to the scan database.

=item C<db_instance>

The name of the database instance to connect to if none is provided with the
B<--database> command line switch.  If neither this configuration option nor
the B<--database> command line option are specified, the hard-coded default
database instance "SCAN" will be used.

=item C<rw_in_class>

The input class used to query scan flows.  Used for the "scanflows" report
type.  If not specified, B<rwfilter>'s default will be used.

=item C<rw_in_type>

The input type(s) used to query scan flows.  Used for the "scanflows" report
type.  If not specified, F<rwfilter>'s default will be used.

=item C<rw_out_class>

The ouput class used to query scan flows.  Used for the "respflows" report
type.  If not specified, B<rwfilter>'s default will be used.

=item C<rw_out_type>

The ouput type used to query scan flows.  Used for the "respflows" report
type.  If not specified, B<rwfilter>'s default will be used.

=back

=head1 SEE ALSO

B<rwscan(1)>, B<rwfilter(1)>

=cut
