/*
** Copyright (C) 2005-2013 by Carnegie Mellon University.
**
** @OPENSOURCE_HEADER_START@
**
** Use of the SILK system and related source code is subject to the terms
** of the following licenses:
**
** GNU Public License (GPL) Rights pursuant to Version 2, June 1991
** Government Purpose License Rights (GPLR) pursuant to DFARS 252.227.7013
**
** NO WARRANTY
**
** ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER
** PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY
** PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN
** "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY
** KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT
** LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE,
** MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE
** OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT,
** SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY
** TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF
** WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES.
** LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF
** CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON
** CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE
** DELIVERABLES UNDER THIS LICENSE.
**
** Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie
** Mellon University, its trustees, officers, employees, and agents from
** all claims or demands made against them (and any related losses,
** expenses, or attorney's fees) arising out of, or relating to Licensee's
** and/or its sub licensees' negligent use or willful misuse of or
** negligent conduct or willful misconduct regarding the Software,
** facilities, or other rights or assistance granted by Carnegie Mellon
** University under this License, including, but not limited to, any
** claims of product liability, personal injury, death, damage to
** property, or violation of any laws or regulations.
**
** Carnegie Mellon University Software Engineering Institute authored
** documents are sponsored by the U.S. Department of Defense under
** Contract FA8721-05-C-0003. Carnegie Mellon University retains
** copyrights in all material produced under this contract. The U.S.
** Government retains a non-exclusive, royalty-free license to publish or
** reproduce these documents, or allow others to do so, for U.S.
** Government purposes only pursuant to the copyright license under the
** contract clause at 252.227.7013.
**
** @OPENSOURCE_HEADER_END@
*/

/*
** rwmatch
**
** rwmatch.c is a utility which takes two files (now called in and
** out) and produces a new file where the contents are linked together
** in the form (saddress, values) on one side and (daddress, values)
** on the other side.  apart from the saddress/daddress relationship,
** any other mating rules are specified by the command line.
**
** This is necessary because there are several operations where mating
** is not direct reversal; the particular case which is driving this
** applicatin is traceroute where the only rational is close
** relationship in time between s and d address, and one of them is a
** UDP signal with an ICMP response
**
** command arguments:
**
** --relate:    takes fields as pairs for relationship.  The first
**              value is the field in file 1, the second value is the
**              field in file ** 2.
**
**              for example, the default could be --relate=1,2
**
**              multiple --relate predicates are accepted.
**              Traditional tcp/ip matching would be:
**              --relate=1,2 --relate=2,1 --relate=3,4 --relate=4,3
**
** --delta:     the number of seconds difference we'll allow between
**              two records before considering them similar.  Defaults
**              to 0.
**
** --unmatched: Include unmatched queries in match file.  This is a
**              less than general solution, but allows scans with scan
**              responses to be captured in a single file.
**
** --unmated:   output path for unmatched records; defaults to
**              /dev/null.  A record must have a response before it is
**              ejected.  So we're always sending out at least two
**              records.
**
** Apart from that; rwmate will annotate the nhip field with an index
** indicating the 'mating flow' that a record belongs to.
**
*/

#include <silk/silk.h>

RCSIDENT("$SiLK: rwmatch.c 2628c36fd2c9 2013-02-21 15:43:02Z mthomas $");

#include <silk/rwrec.h>
#include <silk/sksite.h>
#include <silk/skstream.h>
#include <silk/utils.h>


/* TYPEDEFS AND DEFINES */

/* Where to send --help output */
#define USAGE_FH stdout

#define RWMATE_REL_MAX 16
#define RWMATE_REL_MAX_STR "16"


/*
 *  For debugging.  Argument should be surrounded by double-parens:
 *      PRINTDEBUG((stderr, "My output\n"));
 */
#ifdef ENABLE_PRINTDEBUG
#  define PRINTDEBUG(args) fprintf args
#endif
#ifndef PRINTDEBUG
#  define PRINTDEBUG(args)
#endif


typedef enum {
    RWMATE_QUERY = 0,
    RWMATE_RESPONSE = 1
} match_rec_t;

typedef enum {
    RWMATE_FID_SIP = 1, RWMATE_FID_DIP, RWMATE_FID_SPORT, RWMATE_FID_DPORT,
    RWMATE_FID_PROTO, RWMATE_FID_PKTS, RWMATE_FID_BYTES, RWMATE_FID_FLAGS
} fields_enum_t;

/* make certain these are kept in same order as options */
typedef enum {
    ABSOLUTE_DELTA, RELATIVE_DELTA, INFINITE_DELTA
} delta_enum_t;



/* LOCAL VARIABLES */

/* input and output streams */
static skstream_t *query_stream = NULL;
static skstream_t *response_stream = NULL;
static skstream_t *matched_stream = NULL;

/* the pairs of fields to match on */
static uint8_t count_pairs;
static uint8_t match_pairs[RWMATE_REL_MAX][2];

/* time difference between query and response in milliseconds */
static sktime_t delta_msec = 0;

/* whether to allow the "response" to precede the "query"; for
 * handling traffic where either side may initiate the connection. */
static int symmetric_delta = 0;

/* whether to write unmatched queries/responses in match file */
static int write_unmatched_query = 0;
static int write_unmatched_response = 0;

/* time-delta policy and flags */
static delta_enum_t delta_policy = ABSOLUTE_DELTA;

/* the compression method to use when writing the file.
 * sksiteCompmethodOptionsRegister() will set this to the default or
 * to the value the user specifies. */
static sk_compmethod_t comp_method;


/* OPTIONS SETUP */

typedef enum {
    OPT_RELATE,
    OPT_TIME_DELTA,
    OPT_SYMMETRIC_DELTA,
    /* next three must be in same order as delta_enum_t */
    OPT_ABSOLUTE_DELTA,
    OPT_RELATIVE_DELTA,
    OPT_INFINITE_DELTA,
    OPT_UNMATCHED
} appOptionsEnum;

static struct option appOptions[] = {
    {"relate",          REQUIRED_ARG, 0, OPT_RELATE},
    {"time-delta",      REQUIRED_ARG, 0, OPT_TIME_DELTA},
    {"symmetric-delta", NO_ARG,       0, OPT_SYMMETRIC_DELTA},
    {"absolute-delta",  NO_ARG,       0, OPT_ABSOLUTE_DELTA},
    {"relative-delta",  NO_ARG,       0, OPT_RELATIVE_DELTA},
    {"infinite-delta",  NO_ARG,       0, OPT_INFINITE_DELTA},
    {"unmatched",       REQUIRED_ARG, 0, OPT_UNMATCHED},
    {0,0,0,0}           /* sentinel entry */
};

static const char *appHelp[] = {
    ("A pair of fields to relate when matching data, given in\n"
     "\tthe form '<query field id>,<response field id>'.  The switch may\n"
     "\tbe repeated up to " RWMATE_REL_MAX_STR " times.  Field IDs:\n"
     "\t    1 source IP          5 protocol    \n"
     "\t    2 destination IP     6 packets     \n"
     "\t    3 source port        7 bytes       \n"
     "\t    4 destination port   8 TCP flags   "),
    ("Maximum time difference (in seconds) permitted between\n"
     "\ttwo records for relation. May be fractional. Def. 0.000"),
    ("Also match responses that precede queries by up to\n"
     "\ttime-delta seconds. Def. No"),
    ("Do not include potentially matching flows that start\n"
     "\tmore than time-delta seconds after the end of the initial flow\n"
     "\tof the current match. Def. Yes"),
    ("Continue match with flows that start within time-delta\n"
     "\tseconds of the greatest end time seen for previous\n"
     "\tmembers of the current match. Def. No."),
    ("After forming the initial pair of the match, continue\n"
     "\tmatching on fields alone, ignoring time. Def. No."),
    ("Include unmatched records from QUERY_FILE and/or\n"
     "\tRESPONSE_FILE in OUTPUT_FILE.  Parameter is one of [QqRrBb], where:\n"
     "\tQ / q - query file; R / r - response file, B /b - both"),
    (char *)NULL
};


/* LOCAL FUNCTION PROTOTYPES */

static int  appOptionsHandler(clientData cData, int opt_index, char *opt_arg);
static uint32_t getField(fields_enum_t field_id, const rwRec *current_rec);


/* FUNCTION DEFINITIONS */

/*
 *  appUsageLong();
 *
 *    Print complete usage information to USAGE_FH.  Pass this
 *    function to skOptionsSetUsageCallback(); skOptionsParse() will
 *    call this funciton and then exit the program when the --help
 *    option is given.
 */
static void appUsageLong(void)
{
#define USAGE_MSG                                                       \
    ("--relate=FIELD_PAIR QUERY_FILE RESPONSE_FILE OUTPUT_FILE\n"      \
     "\tRead SiLK Flow records from the QUERY_FILE and RESPONSE_FILE,\n" \
     "\tuse the FIELD_PAIR(s) to group the records as queries and\n" \
     "\tresponses, and write the matched records to OUTPUT_FILE.\n")

    FILE *fh = USAGE_FH;

    skAppStandardUsage(fh, USAGE_MSG, appOptions, appHelp);
    sksiteCompmethodOptionsUsage(fh);
    skOptionsNotesUsage(fh);
    sksiteOptionsUsage(fh);
}


/*
 *  appTeardown()
 *
 *    Teardown all modules, close all files, and tidy up all
 *    application state.
 *
 *    This function is idempotent.
 */
static void appTeardown(void)
{
    static int teardownFlag = 0;
    int rv;

    if (teardownFlag) {
        return;
    }
    teardownFlag = 1;

    rv = skStreamDestroy(&query_stream);
    if (rv) {
        skStreamPrintLastErr(query_stream, rv, &skAppPrintErr);
    }

    rv = skStreamDestroy(&response_stream);
    if (rv) {
        skStreamPrintLastErr(response_stream, rv, &skAppPrintErr);
    }

    rv = skStreamDestroy(&matched_stream);
    if (rv) {
        skStreamPrintLastErr(matched_stream, rv, &skAppPrintErr);
    }

    skAppUnregister();
}


/*
 *  appSetup(argc, argv);
 *
 *    Perform all the setup for this application include setting up
 *    required modules, parsing options, etc.  This function should be
 *    passed the same arguments that were passed into main().
 *
 *    Returns to the caller if all setup succeeds.  If anything fails,
 *    this function will cause the application to exit with a FAILURE
 *    exit status.
 */
static void appSetup(int argc, char **argv)
{
    SILK_FEATURES_DEFINE_STRUCT(features);
    int arg_index;
    int rv;

    /* verify same number of options and help strings */
    assert((sizeof(appHelp)/sizeof(char *)) ==
           (sizeof(appOptions)/sizeof(struct option)));

    /* register the application */
    skAppRegister(argv[0]);
    skAppVerifyFeatures(&features, NULL);
    skOptionsSetUsageCallback(&appUsageLong);

    /* initialize globals */
    count_pairs = 0;

    /* register the options */
    if (skOptionsRegister(appOptions, &appOptionsHandler, NULL)
        || skOptionsNotesRegister(NULL)
        || sksiteCompmethodOptionsRegister(&comp_method)
        || sksiteOptionsRegister(SK_SITE_FLAG_CONFIG_FILE))
    {
        skAppPrintErr("Unable to register options");
        exit(EXIT_FAILURE);
    }

    /* register the teardown handler */
    if (atexit(appTeardown) < 0) {
        skAppPrintErr("Unable to register appTeardown() with atexit()");
        appTeardown();
        exit(EXIT_FAILURE);
    }

    /* parse options */
    arg_index = skOptionsParse(argc, argv);
    if (arg_index < 0) {
        skAppUsage();             /* never returns */
    }

    /* try to load site config file; if it fails, we will not be able
     * to resolve flowtype and sensor from input file names */
    sksiteConfigure(0);

    /* verify that related pairs were given */
    if (count_pairs == 0) {
        skAppPrintErr("At least one --%s pair must be given",
                      appOptions[OPT_RELATE].name);
        skAppUsage();             /* never returns */
    }

    /* get the file arguments */
    if (arg_index == argc) {
        skAppPrintErr("Missing QUERY_FILE argument");
        skAppUsage();             /* never returns */
    }
    if ((rv = skStreamCreate(&query_stream, SK_IO_READ, SK_CONTENT_SILK_FLOW))
        || (rv = skStreamBind(query_stream, argv[arg_index])))
    {
        skStreamPrintLastErr(query_stream, rv, &skAppPrintErr);
        exit(EXIT_FAILURE);
    }
    arg_index++;

    if (arg_index == argc) {
        skAppPrintErr("Missing RESPONSE_FILE argument");
        skAppUsage();             /* never returns */
    }
    if ((rv = skStreamCreate(&response_stream,SK_IO_READ,SK_CONTENT_SILK_FLOW))
        || (rv = skStreamBind(response_stream, argv[arg_index])))
    {
        skStreamPrintLastErr(response_stream, rv, &skAppPrintErr);
        exit(EXIT_FAILURE);
    }
    arg_index++;

    if (arg_index == argc) {
        skAppPrintErr("Missing OUTPUT_FILE argument");
        skAppUsage();             /* never returns */
    }
    if ((rv = skStreamCreate(&matched_stream,SK_IO_WRITE,SK_CONTENT_SILK_FLOW))
        || (rv = skStreamBind(matched_stream, argv[arg_index])))
    {
        skStreamPrintLastErr(matched_stream, rv, &skAppPrintErr);
        exit(EXIT_FAILURE);
    }
    arg_index++;

    /* check for extra options */
    if (arg_index != argc) {
        skAppPrintErr("Too many arguments or unrecognized switch '%s'",
                      argv[arg_index]);
        skAppUsage(); /* never returns */
    }

    /* Now, open the files */
    if ((rv = skStreamSetIPv6Policy(query_stream, SK_IPV6POLICY_ASV4))
        || (rv = skStreamOpen(query_stream))
        || (rv = skStreamReadSilkHeader(query_stream, NULL)))
    {
        skStreamPrintLastErr(query_stream, rv, &skAppPrintErr);
        skStreamDestroy(&query_stream);
        skAppPrintErr("Cannot open QUERY_FILE. Exiting.");
        exit(EXIT_FAILURE);
    }

    if ((rv = skStreamSetIPv6Policy(response_stream, SK_IPV6POLICY_ASV4))
        || (rv = skStreamOpen(response_stream))
        || (rv = skStreamReadSilkHeader(response_stream, NULL)))
    {
        skStreamPrintLastErr(response_stream, rv, &skAppPrintErr);
        skStreamDestroy(&response_stream);
        skAppPrintErr("Cannot open RESPONSE_FILE. Exiting.");
        exit(EXIT_FAILURE);
    }

    if ((rv=skHeaderSetCompressionMethod(skStreamGetSilkHeader(matched_stream),
                                         comp_method))
        || (rv = skHeaderAddInvocation(skStreamGetSilkHeader(matched_stream),
                                       1, argc, argv))
        || (rv = skOptionsNotesAddToStream(matched_stream))
        || (rv = skStreamOpen(matched_stream))
        || (rv = skStreamWriteSilkHeader(matched_stream)))
    {
        skStreamPrintLastErr(matched_stream, rv, &skAppPrintErr);
        skStreamDestroy(&matched_stream);
        skAppPrintErr("Cannot open OUTPUT_FILE. Exiting.");
        exit(EXIT_FAILURE);
    }

    return;                     /* OK */
}


/*
 *  status = appOptionsHandler(cData, opt_index, opt_arg);
 *
 *    This function is passed to skOptionsRegister(); it will be called
 *    by skOptionsParse() for each user-specified switch that the
 *    application has registered; it should handle the switch as
 *    required---typically by setting global variables---and return 1
 *    if the switch processing failed or 0 if it succeeded.  Returning
 *    a non-zero from from the handler causes skOptionsParse() to return
 *    a negative value.
 *
 *    The clientData in 'cData' is typically ignored; 'opt_index' is
 *    the index number that was specified as the last value for each
 *    struct option in appOptions[]; 'opt_arg' is the user's argument
 *    to the switch for options that have a REQUIRED_ARG or an
 *    OPTIONAL_ARG.
 */
static int appOptionsHandler(
    clientData  UNUSED(cData),
    int         opt_index,
    char       *opt_arg)
{
    static int delta_seen = 0;
    uint32_t *parsed_list;
    uint32_t count;
    double opt_double;
    int rv;

    switch ((appOptionsEnum)opt_index) {
      case OPT_RELATE:
        if (count_pairs == RWMATE_REL_MAX) {
            skAppPrintErr(("Total number of relations specified"
                           " exceeds maximum (%d)"),
                          RWMATE_REL_MAX);
            return 1;
        }
        rv = skStringParseNumberList(&parsed_list, &count, opt_arg,
                                     RWMATE_FID_SIP, RWMATE_FID_FLAGS, 2);
        if (rv) {
            goto PARSE_ERROR;
        }
        assert(count > 0 && count <= 2);
        if (count != 2) {
            skAppPrintErr("Invalid %s '%s': Each relation requires two IDs",
                          appOptions[opt_index].name, opt_arg);
            free(parsed_list);
            return 1;
        }
        match_pairs[count_pairs][RWMATE_QUERY] = parsed_list[0];
        match_pairs[count_pairs][RWMATE_RESPONSE] = parsed_list[1];
        count_pairs++;
        free(parsed_list);
        break;

      case OPT_TIME_DELTA:
        rv = skStringParseDouble(&opt_double, opt_arg, 0.001, INT32_MAX);
        if (rv) {
            goto PARSE_ERROR;
        }
        delta_msec = (sktime_t)(1000.0 * opt_double);
        break;

      case OPT_SYMMETRIC_DELTA:
        symmetric_delta = 1;
        break;

      case OPT_ABSOLUTE_DELTA:
      case OPT_RELATIVE_DELTA:
      case OPT_INFINITE_DELTA:
        /* May only specify once */
        if (delta_seen) {
            skAppPrintErr("May only specify one of --%s, --%s, or --%s",
                          appOptions[OPT_ABSOLUTE_DELTA].name,
                          appOptions[OPT_RELATIVE_DELTA].name,
                          appOptions[OPT_INFINITE_DELTA].name);
            return 1;
        }
        delta_seen = 1;
        delta_policy = (delta_enum_t)(opt_index - OPT_ABSOLUTE_DELTA);
        break;

      case OPT_UNMATCHED:
        if (strlen(opt_arg) != 1) {
            skAppPrintErr(("Invalid %s '%s': "
                           "argument must be one of \"q,r,b\""),
                          appOptions[opt_index].name, opt_arg);
            return 1;
        } else {
            switch (opt_arg[0]) {
              case 'Q':
              case 'q':
                write_unmatched_query = 1;
                break;
              case 'R':
              case 'r':
                write_unmatched_response = 1;
                break;
              case 'B':
              case 'b':
                write_unmatched_query = 1;
                write_unmatched_response = 1;
                break;
              default:
                skAppPrintErr(("Invalid %s '%s': "
                               "argument must be one of \"qrb\""),
                              appOptions[opt_index].name, opt_arg);
                return 1;
            }
        }
        break;
    }

    return 0; /* OK */

  PARSE_ERROR:
    skAppPrintErr("Invalid %s '%s': %s",
                  appOptions[opt_index].name, opt_arg,
                  skStringParseStrerror(rv));
    return 1;
}


/*
 * uint32_t getField(fields_enum_t current_field, rwRec *current_rec)
 *
 * Given a record and a field ID returns the value of the field for
 * that record.
 */
static uint32_t getField(
    fields_enum_t   field_id,
    const rwRec    *current_rec)
{
    uint32_t result = 0;

    switch (field_id) {
      case RWMATE_FID_SIP:
        result = rwRecGetSIPv4(current_rec);
        break;
      case RWMATE_FID_DIP:
        result = rwRecGetDIPv4(current_rec);
        break;
      case RWMATE_FID_SPORT:
        result = rwRecGetSPort(current_rec);
        break;
      case RWMATE_FID_DPORT:
        result = rwRecGetDPort(current_rec);
        break;
      case RWMATE_FID_PROTO:
        result = rwRecGetProto(current_rec);
        break;
      case RWMATE_FID_PKTS:
        result = rwRecGetPkts(current_rec);
        break;
      case RWMATE_FID_BYTES:
        result = rwRecGetBytes(current_rec);
        break;
      case RWMATE_FID_FLAGS:
        result = rwRecGetFlags(current_rec);
        break;
    }
    return result;
}


/*
 * int compareRecords(rwRec *propose, rwRec *response)
 *
 * this uses the same hash information from rwm_hash to compare an
 * query and response record in order to determine whether or not.
 * Note the global variable dependence.
 *
 * Returns 1 if the query > outing, -1 if response > query, 0 if
 * equal
 */
static int compareFields(
    const rwRec    *rec_1,
    match_rec_t     type_1,
    const rwRec    *rec_2,
    match_rec_t     type_2)
{
    uint8_t i;
    uint32_t val_1;
    uint32_t val_2;

    assert(type_1 == RWMATE_QUERY || type_1 == RWMATE_RESPONSE);
    assert(type_2 == RWMATE_QUERY || type_2 == RWMATE_RESPONSE);

    for (i = 0 ; i < count_pairs; i++) {
        val_1 = getField((fields_enum_t)match_pairs[i][type_1], rec_1);
        val_2 = getField((fields_enum_t)match_pairs[i][type_2], rec_2);
        if (val_1 > val_2) {
            return 1;
        } else if (val_1 < val_2) {
            return -1;
        }
    }
    return 0;
}


static int compareTimes(
    const rwRec    *rec_1,
    const rwRec    *rec_2)
{
    /*
     *  There are two cases for match (plus a subcase for equal start
     *  times).
     *
     *  In the symmetric case, we want the start time of the later
     *  flow to be within <delta> milliseconds of the end time of the
     *  earlier flow.
     *
     *  In the asymmetric case, we want the end time of the first flow
     *  to be within <delta> milliseconds of the start time of the
     *  second flow.
     *
     *  If we fail to match, we compare the start times of the first
     *  and second flows.
     */

    if (rwRecGetStartTime(rec_1) == rwRecGetStartTime(rec_2)) {
        /* Symmetry does not matter */
        return 0;
    }

    if (symmetric_delta) {
        if (rwRecGetStartTime(rec_1) > rwRecGetStartTime(rec_2)) {
            /* Second record is earlier.  End time of second should be
             * within <delta> of start time of first */
            if (rwRecGetEndTime(rec_2)
                >= (rwRecGetStartTime(rec_1) - delta_msec))
            {
                return 0;
            }
        } else {
            /* First record is earlier.  End time of first should be
             * within <delta> of start time of second */
            if (rwRecGetEndTime(rec_1)
                >= (rwRecGetStartTime(rec_2) - delta_msec))
            {
                return 0;
            }
        }
    } else {
        /* asymmetric case.  Start of second should be > than start of
         * first and within <delta> of end of first */
        if ((rwRecGetStartTime(rec_2) > rwRecGetStartTime(rec_1))
            && (rwRecGetEndTime(rec_1)
                >= (rwRecGetStartTime(rec_2) - delta_msec)))
        {
            return 0;
        }
    }

    /* All the match cases have been considered.  If we reach here, we
     * do not have a match. */
    if (rwRecGetStartTime(rec_1) > rwRecGetStartTime(rec_2)) {
        return 1;
    }
    return -1;
}


/*  dir = guessQueryDirection(query_rec, response_rec)
 *
 *    Attempts to determine who started the conversation that is
 *    represented by the match pair 'query_rec' and
 *    'response_rec'. The function returns one of RWMATE_QUERY or
 *    RWMATE_RESPONSE.
 *
 *    For the present, the choice is simple: The record with the
 *    earlier start time is the leader.  If that fails, we have a
 *    heueristic:
 *
 *    If the protocol is TCP or UDP, we see if one of the query ports
 *    is in the range 0-1023 and the other in the range 1024-65535. If
 *    this is the case, the flow is in the direction of the low port.
 *
 *    This could be supplemented by using a set to define a set of
 *    service ports and using them in the same way.  Other techniques
 *    could be based on, e.g. first flags for TCP, etc.
 *
 *    If all else fails, the default is that the query side is favored
 *    and RWMATE_QUERY is returned.
 */
static match_rec_t guessQueryDirection(
    const rwRec* query_rec,
    const rwRec* response_rec)
{
    /* Return ealier record */
    if (rwRecGetStartTime(query_rec) > rwRecGetStartTime(response_rec)) {
        return RWMATE_RESPONSE;
    }
    if (rwRecGetStartTime(query_rec) < rwRecGetStartTime(response_rec)) {
        return RWMATE_QUERY;
    }

    /* Times equal, make a guess */
    if ((rwRecGetProto(query_rec) == 6) || (rwRecGetProto(query_rec) == 17)) {
        if ((rwRecGetDPort(query_rec) < 1024)
            && (rwRecGetSPort(query_rec) > 1023))
        {
            return RWMATE_QUERY;
        }
        if ((rwRecGetSPort(query_rec) < 1024)
            && (rwRecGetDPort(query_rec) > 1023))
        {
            return RWMATE_RESPONSE;
        }
    }

    /* default */
    return RWMATE_QUERY;
}


int main(int argc, char **argv)
{
    rwRec query_rec;
    rwRec response_rec;
    rwRec base_rec;
    uint32_t match_id = 0;
    int have_query = 1;
    int have_response = 1;
    int have_match_query;
    int have_match_response;
    int rv;
    match_rec_t base_type;
    match_rec_t match_lead;
    sktime_t base_etime;

    appSetup(argc, argv); /* never returns on error */

    /*
     * The revised version of this application requires
     * sorted data that matches the mating.  So this means
     * that we're going to have records sorted slightly
     * differently (if we match 2/1, we sort --field=2,9 and
     * --field=1,9) on the two applications.
     *
     * The loop always begins with a reference input record and an
     * output record.  As long as we have both, we continue to loop,
     * flushing the "earlier" or "later" records and processing
     * matches as we find them.
     */
    rv = skStreamReadRecord(query_stream, &query_rec);
    if (rv != 0) {
        if (rv == SKSTREAM_ERR_EOF) {
            /* QUERY_FILE is empty */
            have_query = 0;
        } else {
            skStreamPrintLastErr(query_stream, rv, &skAppPrintErr);
            exit(EXIT_FAILURE);
        }
    }
    rv = skStreamReadRecord(response_stream, &response_rec);
    if (rv != 0) {
        if (rv == SKSTREAM_ERR_EOF) {
            /* RESPONSE_FILE is empty */
            have_response = 0;
        } else {
            skStreamPrintLastErr(response_stream, rv, &skAppPrintErr);
            exit(EXIT_FAILURE);
        }
    }

    while (have_query && have_response) {
        /* Must try match on fields first because of sort order */
        rv = compareFields(&query_rec, RWMATE_QUERY,
                           &response_rec, RWMATE_RESPONSE);
        if (rv == 0) {
            rv = compareTimes(&query_rec, &response_rec);
        }


        if (rv < 0) {
            /* QUERY is too early; read next query */

            /* if we are including unmatched queries, write the
             * record, after clearing the NHIP field to indicate lack
             * of match */
            if (write_unmatched_query) {
                rwRecSetNhIPv4(&query_rec, 0);
                rv = skStreamWriteRecord(matched_stream, &query_rec);
                if (SKSTREAM_ERROR_IS_FATAL(rv)) {
                    skStreamPrintLastErr(matched_stream, rv, &skAppPrintErr);
                    exit(EXIT_FAILURE);
                }
            }
            rv = skStreamReadRecord(query_stream, &query_rec);
            if (rv) {
                have_query = 0;
                if (SKSTREAM_ERR_EOF != rv) {
                    skStreamPrintLastErr(query_stream, rv, &skAppPrintErr);
                }
            }

        } else if (rv > 0) {
            /* RESPONSE is too early; read next response */

            /* if we are including unmatched responses, write them out
             * now, after settting the NHIP field to indicate lack of
             * match */
            if (write_unmatched_response) {
                rwRecSetNhIPv4(&response_rec, 0xFF000000);
                rv = skStreamWriteRecord(matched_stream, &response_rec);
                if (SKSTREAM_ERROR_IS_FATAL(rv)) {
                    skStreamPrintLastErr(matched_stream, rv, &skAppPrintErr);
                    exit(EXIT_FAILURE);
                }
            }
            rv = skStreamReadRecord(response_stream, &response_rec);
            if (rv) {
                have_response = 0;
                if (SKSTREAM_ERR_EOF != rv) {
                    skStreamPrintLastErr(response_stream, rv, &skAppPrintErr);
                }
            }

        } else {
            /* RECORDS MATCH.  Determine the direction of the match
             * and keep the first record as the match base. */
            ++match_id;
            have_match_query = 1;
            have_match_response = 1;
            base_type = guessQueryDirection(&query_rec, &response_rec);
            if (base_type == RWMATE_QUERY) {
                RWREC_COPY(&base_rec, &query_rec);
            } else {
                RWREC_COPY(&base_rec, &response_rec);
            }
            base_etime = rwRecGetEndTime(&base_rec);

            /* sanity check and debugging */
            assert((base_type == RWMATE_QUERY)
                   || (base_type == RWMATE_RESPONSE));
            PRINTDEBUG((stderr, "M %d %s\n", match_id,
                        (base_type == RWMATE_QUERY ? "RWM_Q" : "RWM_R")));

            /* Now we have the base and we have both match sides. We
             * loop as long as we have at least one record that
             * matches the base */
            do {
                /* Need to decide which to process */
                PRINTDEBUG((stderr, "(%c%c) ",
                            (have_match_query ? 'Q' : ' '),
                            (have_match_response ? 'R' : ' ')));
                if (have_match_query && have_match_response) {
                    /* use earlier record as the match_lead */
                    if (rwRecGetStartTime(&query_rec)
                        < rwRecGetStartTime(&response_rec))
                    {
                        match_lead = RWMATE_QUERY;
                    } else if (rwRecGetStartTime(&query_rec)
                               > rwRecGetStartTime(&response_rec))
                    {
                        match_lead = RWMATE_RESPONSE;
                    } else {
                        /* default for time tie */
                        match_lead = base_type;
                    }
                } else if (have_match_query) {
                    match_lead = RWMATE_QUERY;
                } else {
                    match_lead = RWMATE_RESPONSE;
                }

                if (match_lead == RWMATE_QUERY) {
                    /* write the query and read a new one, testing for
                     * a match */
                    PRINTDEBUG((stderr, "  Q "));
                    rwRecSetNhIPv4(&query_rec, match_id);
                    rv = skStreamWriteRecord(matched_stream, &query_rec);
                    if (SKSTREAM_ERROR_IS_FATAL(rv)) {
                        skStreamPrintLastErr(matched_stream, rv,
                                             &skAppPrintErr);
                        exit(EXIT_FAILURE);
                    }
                    have_match_query = 0;

                    rv = skStreamReadRecord(query_stream, &query_rec);
                    if (rv) {
                        /* EOF on query - No more match pairs */
                        have_query = 0;
                        PRINTDEBUG((stderr, " eof\n"));
                        if (SKSTREAM_ERR_EOF != rv) {
                            skStreamPrintLastErr(query_stream, rv,
                                                 &skAppPrintErr);
                        }
                        continue;
                    }
                    if (0 != compareFields(&base_rec, base_type,
                                           &query_rec, RWMATE_QUERY))
                    {
                        /* No match on fields - No more match pairs */
                        PRINTDEBUG((stderr, " nF\n"));
                        continue;
                    }
                    if ((delta_policy == INFINITE_DELTA)
                        || ((rwRecGetStartTime(&query_rec) - delta_msec)
                            < base_etime))
                    {
                        /* we have a match on fields and times */
                        PRINTDEBUG((stderr, " FT\n"));
                        have_match_query = 1;

                        if ((delta_policy == RELATIVE_DELTA)
                            && (base_etime < rwRecGetEndTime(&query_rec)))
                        {
                            /* update base end time */
                            base_etime = rwRecGetEndTime(&query_rec);
                        }
                    }
                } else {
                    assert(match_lead == RWMATE_RESPONSE);
                    /* write the response and read a new one, testing
                     * for a match */
                    PRINTDEBUG((stderr, "  R "));
                    rwRecSetNhIPv4(&response_rec, 0xFF000000 | match_id);
                    rv = skStreamWriteRecord(matched_stream, &response_rec);
                    if (SKSTREAM_ERROR_IS_FATAL(rv)) {
                        skStreamPrintLastErr(matched_stream, rv,
                                             &skAppPrintErr);
                        exit(EXIT_FAILURE);
                    }
                    have_match_response = 0;

                    rv = skStreamReadRecord(response_stream, &response_rec);
                    if (rv) {
                        /* EOF on response - No more match pairs */
                        have_response = 0;
                        PRINTDEBUG((stderr, " eof\n"));
                        if (SKSTREAM_ERR_EOF != rv) {
                            skStreamPrintLastErr(response_stream, rv,
                                                 &skAppPrintErr);
                        }
                        continue;
                    }
                    if (0 != compareFields(&base_rec, base_type,
                                           &response_rec, RWMATE_RESPONSE))
                    {
                        /* No match on fields - No more match pairs */
                        PRINTDEBUG((stderr, " nF\n"));
                        continue;
                    }
                    if ((delta_policy == INFINITE_DELTA)
                        || ((rwRecGetStartTime(&response_rec) - delta_msec)
                            < base_etime))
                    {
                        /* we have a match on fields and times */
                        PRINTDEBUG((stderr, " FT\n"));
                        have_match_response = 1;

                        if ((delta_policy == RELATIVE_DELTA)
                            && (base_etime < rwRecGetEndTime(&response_rec)))
                        {
                            /* update base end time */
                            base_etime = rwRecGetEndTime(&response_rec);
                        }
                    }
                }
            } while (have_match_query || have_match_response);
        }
    }

    /* write the remaining unmatched records */
    if (write_unmatched_query && have_query) {
        do {
            rwRecSetNhIPv4(&query_rec, 0);
            rv = skStreamWriteRecord(matched_stream, &query_rec);
            if (SKSTREAM_ERROR_IS_FATAL(rv)) {
                skStreamPrintLastErr(matched_stream, rv, &skAppPrintErr);
                exit(EXIT_FAILURE);
            }
        } while ((rv = skStreamReadRecord(query_stream, &query_rec))
                 == SKSTREAM_OK);
        if (SKSTREAM_ERR_EOF != rv) {
            skStreamPrintLastErr(query_stream, rv, &skAppPrintErr);
        }
    }

    if (write_unmatched_response && have_response) {
        do {
            rwRecSetNhIPv4(&response_rec, 0xFF000000);
            rv = skStreamWriteRecord(matched_stream, &response_rec);
            if (SKSTREAM_ERROR_IS_FATAL(rv)) {
                skStreamPrintLastErr(matched_stream, rv, &skAppPrintErr);
                exit(EXIT_FAILURE);
            }
        } while ((rv = skStreamReadRecord(response_stream, &response_rec))
                 == SKSTREAM_OK);
        if (SKSTREAM_ERR_EOF != rv) {
            skStreamPrintLastErr(response_stream, rv, &skAppPrintErr);
        }
    }

    if (matched_stream) {
        skStreamDestroy(&matched_stream);
    }

    /* done */
    appTeardown();

    return 0;
}


/*
** Local Variables:
** mode:c
** indent-tabs-mode:nil
** c-basic-offset:4
** End:
*/
