/*
** Copyright (C) 2001-2012 by Carnegie Mellon University.
**
** @OPENSOURCE_HEADER_START@
**
** Use of the SILK system and related source code is subject to the terms
** of the following licenses:
**
** GNU Public License (GPL) Rights pursuant to Version 2, June 1991
** Government Purpose License Rights (GPLR) pursuant to DFARS 252.227.7013
**
** NO WARRANTY
**
** ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER
** PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY
** PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN
** "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY
** KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT
** LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE,
** MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE
** OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT,
** SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY
** TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF
** WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES.
** LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF
** CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON
** CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE
** DELIVERABLES UNDER THIS LICENSE.
**
** Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie
** Mellon University, its trustees, officers, employees, and agents from
** all claims or demands made against them (and any related losses,
** expenses, or attorney's fees) arising out of, or relating to Licensee's
** and/or its sub licensees' negligent use or willful misuse of or
** negligent conduct or willful misconduct regarding the Software,
** facilities, or other rights or assistance granted by Carnegie Mellon
** University under this License, including, but not limited to, any
** claims of product liability, personal injury, death, damage to
** property, or violation of any laws or regulations.
**
** Carnegie Mellon University Software Engineering Institute authored
** documents are sponsored by the U.S. Department of Defense under
** Contract FA8721-05-C-0003. Carnegie Mellon University retains
** copyrights in all material produced under this contract. The U.S.
** Government retains a non-exclusive, royalty-free license to publish or
** reproduce these documents, or allow others to do so, for U.S.
** Government purposes only pursuant to the copyright license under the
** contract clause at 252.227.7013.
**
** @OPENSOURCE_HEADER_END@
*/
#ifndef _RWSKUNIQUE_H
#define _RWSKUNIQUE_H 1

#include <silk/silk.h>

RCSIDENTVAR(rcsID_SKUNIQUE_H, "$SiLK: skunique.h 372a8bc31d8a 2012-02-10 21:55:28Z mthomas $");

#include <silk/skstream.h>
#include <silk/rwrec.h>

/*
**  skunqiue.h
**
**    An attempt to make the bulk of rwuniq into a stand-alone
**    library.
**
*/



/* LOCAL DEFINES AND TYPEDEFS */

/*
 *  For debugging.  Argument should be surrounded by double-parens:
 *      PRINTDEBUG((stderr, "My output\n"));
 */
#ifdef ENABLE_PRINTDEBUG
#  define PRINTDEBUG(args) fprintf args
#endif
#ifndef PRINTDEBUG
#  define PRINTDEBUG(args)
#endif

#ifndef SKUNIQ_USE_MEMCPY
#ifdef SK_HAVE_ALIGNED_ACCESS_REQUIRED
#define SKUNIQ_USE_MEMCPY 1
#else
#define SKUNIQ_USE_MEMCPY 0
#endif
#endif




typedef enum sk_fieldid_en {
    /* the following correspond to values in rwascii.h */
    SK_FIELD_SIPv4,
    SK_FIELD_DIPv4,
    SK_FIELD_SPORT,
    SK_FIELD_DPORT,
    SK_FIELD_PROTO,
    SK_FIELD_PACKETS,
    SK_FIELD_BYTES,
    SK_FIELD_FLAGS,
    SK_FIELD_STARTTIME,
    SK_FIELD_ELAPSED,
    SK_FIELD_ENDTIME,
    SK_FIELD_SID,
    SK_FIELD_INPUT,
    SK_FIELD_OUTPUT,
    SK_FIELD_NHIPv4,
    SK_FIELD_INIT_FLAGS,
    SK_FIELD_REST_FLAGS,
    SK_FIELD_TCP_STATE,
    SK_FIELD_APPLICATION,
    SK_FIELD_FTYPE_CLASS,
    SK_FIELD_FTYPE_TYPE,
    SK_FIELD_STARTTIME_MSEC,
    SK_FIELD_ENDTIME_MSEC,
    SK_FIELD_ELAPSED_MSEC,
    SK_FIELD_ICMP_TYPE_CODE,
    /* the above correspond to values in rwascii.h */

    SK_FIELD_SIPv6,
    SK_FIELD_DIPv6,
    SK_FIELD_NHIPv6,
    SK_FIELD_RECORDS,
    SK_FIELD_SUM_PACKETS,
    SK_FIELD_SUM_BYTES,
    SK_FIELD_SUM_ELAPSED,
    SK_FIELD_MIN_STARTTIME,
    SK_FIELD_MAX_ENDTIME,
    SK_FIELD_CALLER

} sk_fieldid_t;



/* holds a list of fields */
typedef struct sk_fieldlist_st sk_fieldlist_t;

/* holds a single field */
typedef struct sk_fieldentry_st sk_fieldentry_t;

/* iterator over a field list */
typedef struct sk_fieldlist_iterator_st sk_fieldlist_iterator_t;


/*
 *  cmp = skFieldCompareMemcmp(a, b, &len);
 *
 *    Comparison function the caller may use.  This function performs
 *    memcmp() on the values 'a' and 'b'.  The number of bytes in the
 *    'a' and 'b' should be passed in as the context.
 */
int skFieldCompareMemcmp(const void *a, const void *b, void *len);


/*
 *  cmp = skFieldCompareUintXX(a, b);
 *
 *    Comparison function the caller may use.  These functions
 *    compares 'a' and 'b' as containing unsigned uintXX_t's in native
 *    byte order.  The context is ignored.
 */
int skFieldCompareUint8(const void *a, const void *b, void *ctx);
int skFieldCompareUint16(const void *a, const void *b, void *ctx);
int skFieldCompareUint32(const void *a, const void *b, void *ctx);
int skFieldCompareUint64(const void *a, const void *b, void *ctx);


/*
 *  skFieldMergeUintXX(a, b);
 *
 *    Merging functions the caller may use.  These functions treat 'a'
 *    and 'b' as containing unsigned uintXX_t's in native byte order.
 *    The two values are summed and the result it put into 'a'.
 */
void skFieldMergeUint8(void *a, const void *b, void *ctx);
void skFieldMergeUint16(void *a, const void *b, void *ctx);
void skFieldMergeUint32(void *a, const void *b, void *ctx);
void skFieldMergeUint64(void *a, const void *b, void *ctx);



/* callback for adding/converting a record to a binary value
 *
 * -- if we want this to work with the existing plug-in API, do we
 *    need to include the "extra" value here?
 */
typedef void (*sk_fieldlist_rec_to_bin_fn_t)(
    const rwRec        *rec,
    uint8_t            *dest,
    void               *ctx);

/* callback for comparing two binary values */
typedef int (*sk_fieldlist_bin_cmp_fn_t)(
    const uint8_t      *bin1,
    const uint8_t      *bin2,
    void               *ctx);

/* callback for initializing a binary value */
typedef void (*sk_fieldlist_bin_init_fn_t)(
    uint8_t            *bin,
    void               *ctx);

/* callback for merging two binary values--used for aggregate values
 * (and maybe distinct fields?) */
typedef void (*sk_fieldlist_bin_merge_fn_t)(
    uint8_t            *bin1,
    const uint8_t      *bin2,
    void               *ctx);

typedef void (*sk_fieldlist_output_fn_t)(
    const uint8_t      *bin,
    void               *ctx);

/* UGH!  rwstats goes through the table twice.  Once to determine
 * thresholds and then once to print the results.  When dealing with
 * presorted data, rwstats knows it can only process the data once.
 *
 * This makes we wonder whether there shouldn't be a completely
 * separate API for handling presorted input...
 */

/* function to output a row from the unique object.  there should be a
 * return value from this function that indicates no more output is to
 * be generated. */
typedef int (*sk_unique_output_fn_t)(
    const uint8_t                  *key_fields_buffer,
    const uint8_t                  *distinct_fields_buffer,
    const uint8_t                  *value_fields_buffer,
    void                           *callback_data);

/* structure holding pointers to the above functions */
typedef struct sk_fieldlist_entrydata_st {
    sk_fieldlist_rec_to_bin_fn_t    rec_to_bin;
    sk_fieldlist_bin_cmp_fn_t       bin_compare;
    sk_fieldlist_rec_to_bin_fn_t    add_rec_to_bin;
    sk_fieldlist_bin_merge_fn_t     bin_merge;
    sk_fieldlist_output_fn_t        bin_output;
    const uint8_t                  *initial_value;
    size_t                          bin_octets;
} sk_fieldlist_entrydata_t;

/* create a new field list */
int skFieldListCreate(
    sk_fieldlist_t                **field_list);

/* destroy a field list */
void skFieldListDestroy(
    sk_fieldlist_t                **field_list);

/* add a field to a field list */
sk_fieldentry_t *skFieldListAddField(
    sk_fieldlist_t                 *field_list,
    const sk_fieldlist_entrydata_t *regdata,
    void                           *ctx);

sk_fieldentry_t *skFieldListAddKnownField(
    sk_fieldlist_t                 *field_list,
    int                             field_id,
    void                           *ctx);


void *skFieldListEntryGetContext(const sk_fieldentry_t *field);

uint32_t skFieldListEntryGetId(const sk_fieldentry_t *field);

size_t skFieldListEntryGetBinOctets(const sk_fieldentry_t *field);

size_t skFieldListGetFieldCount(
    const sk_fieldlist_t           *field_list);

/* returns (binary) size of all fields */
size_t skFieldListGetBufferSize(
    const sk_fieldlist_t           *field_list);

/* sets 'all_fields_buffer' to the initial value for each field in the
 * field list. */
void skFieldListInitializeBuffer(
    const sk_fieldlist_t           *field_list,
    uint8_t                        *all_fields_buffer);

/* merges (e.g., add) two buffers for a field list */
void skFieldListMergeBuffers(
    const sk_fieldlist_t           *field_list,
    uint8_t                        *all_fields_buffer1,
    const uint8_t                  *all_fields_buffer2);

/* compare two field buffers, return -1, 0, 1, if 'all_fields_buffer1'
 * is <, ==, > 'all_fields_buffer2' */
int skFieldListCompareBuffers(
    const sk_fieldlist_t           *field_list,
    const uint8_t                  *all_fields_buffer1,
    const uint8_t                  *all_fields_buffer2);

/* gets the binary value for each field in 'field_list' and sets that
 * value in 'all_fields_buffer' */
void skFieldListRecToBinary(
    const sk_fieldlist_t           *field_list,
    const rwRec                    *rwrec,
    uint8_t                        *all_fields_buffer);

/* adds the binary value for each field in 'field_list' to the values
 * in 'all_fields_buffer' */
void skFieldListAddRecToBuffer(
    const sk_fieldlist_t           *field_list,
    const rwRec                    *rwrec,
    uint8_t                        *all_fields_buffer);

/* calls the output callback for each field */
void skFieldListOutputBuffer(
    const sk_fieldlist_t           *field_list,
    const uint8_t                  *all_fields_buffer);


/* Do we still need the field iterators (as public)? */

/* typedef struct sk_fieldlist_iterator_st sk_fieldlist_iterator_t; */
struct sk_fieldlist_iterator_st {
    const sk_fieldlist_t   *field_list;
    size_t                  field_idx;
};


/* bind an iterator to a field list */
void skFieldListIteratorBind(
    const sk_fieldlist_t           *field_list,
    sk_fieldlist_iterator_t        *iter);

/* reset the fieldlist iterator */
void skFieldListIteratorReset(
    sk_fieldlist_iterator_t        *iter);

sk_fieldentry_t *skFieldListIteratorNext(
    sk_fieldlist_iterator_t        *iter);

/* copy the value associated with 'field_id' from 'all_fields_buffer'
 * and into 'one_field_buf' */
void skFieldListExtractFromBuffer(
    const sk_fieldlist_t           *field_list,
    const uint8_t                  *all_fields_buffer,
    sk_fieldentry_t                *field_entry,
    uint8_t                        *one_field_buf);



/* ************************************************************ */

/* structure for binning records */
typedef struct sk_unique_st sk_unique_t;

/* iterates over the bins after records have been added */
typedef struct sk_unique_iterator_st sk_unique_iterator_t;



/*
 *  skUniqueDestroy(&uniq);
 *
 *    Creates a new unique object and stores its address in the
 *    parameter 'uniq'.  Returns -1 if the unique object cannot be
 *    created.
 */
int skUniqueCreate(
    sk_unique_t                   **uniq);


/*
 *  skUniqueDestroy(&uniq);
 *
 *    Destroys the unique object whose address is specified in 'uniq'.
 *    Removes any temporary files.  This function may be called with a
 *    NULL pointer.
 */
void skUniqueDestroy(
    sk_unique_t                   **uniq);


/*
 *  ok = skUniqueSetSortedOutput(uniq);
 *
 *    Specifies that the unique object 'uniq' should present its
 *    results in sorted order.  This function must be called before
 *    calling skUniquePrepareForInput(); otherwise, the function
 *    returns -1.
 */
int skUniqueSetSortedOutput(
    sk_unique_t                    *uniq);


/*
 * UNIMPLEMENTED
 *
 * specifies that unique object will be fed records that have been
 * pre-sorted by the key.  Must be called before
 * skUniquePrepareForInput().
 *
 * One big issue with this: currently when processing sorted data, we
 * can print/output rows as we read them.  If we now have separate
 * "read-everything" then "output-everything" steps, we lose a lot of
 * the advantages of processing sorted data.
 *
 * With the V2 version of this API, this is no longer an issue since
 * we have an output callback function.
 *
 * With V3 of the interface, we have the user specify a callback to
 * use when processing presorted input.  If the data is not presorted,
 * the user can iterate over the contents of the unique object as much
 * as they want.
 */
void skUniqueSetSortedInput(
    sk_unique_t                    *uniq,
    sk_unique_output_fn_t           output_fn,
    void                           *callback_data);


/*
 *  skUniqueSetTempDirectory(uniq, temp_dir);
 *
 *    Specifies that the unique object 'uniq' should use the directory
 *    'temp_dir' to store temporary files.  This function is a no-op
 *    if it is called after skUniquePrepareForInput() has been called.
 *    This function makes a copy of the parameter 'temp_dir'.
 */
void skUniqueSetTempDirectory(
    sk_unique_t                    *uniq,
    const char                     *temp_dir);


/*
 *  ok = skUniqueSetFields(uniq, keys, distincts, values);
 *
 *    Specifies the fields that the unique object 'uniq' should use.
 *    It is an error to call this function after
 *    skUniquePrepareForInput() has been called.
 *
 *    'keys' is a field list containing the fields to bin by.  It must
 *    contain at least one field.
 *
 *    'distincts' is a field list containing the fields where the
 *    caller wishes to get a count of distinct values per key.  It may
 *    be NULL or contain no fields, subject to the additional
 *    containsts given below.  The keys specified in 'distincts' may
 *    not be present in 'keys'.
 *
 *    'values' is a field list containing the fields where the caller
 *    wishes to compute an aggregate value per key.  It may be NULL or
 *    contain no fields, subject to the additional containsts given
 *    below.
 *
 *    If this function is called multiple times, the previous field
 *    values will be lost.
 *
 *    These are additional constaints, which will be checked when
 *    skUniquePrepareForInput() is called:
 *
 *    The length of 'keys' and 'distincts' must total to no more than
 *    HASHLIB_MAX_KEY_WIDTH bytes.
 *
 *    The length of 'values' must be no more than
 *    HASHLIB_MAX_VALUE_WIDTH bytes.
 *
 *    At least on 'values' or one 'distincts' field must be specified.
 */
int skUniqueSetFields(
    sk_unique_t                    *uniq,
    const sk_fieldlist_t           *key_fields,
    const sk_fieldlist_t           *distinct_fields,
    const sk_fieldlist_t           *agg_value_fields);


/*
 *  ok = skUniquePrepareForInput(uniq);
 *
 *    Tells the unique object 'uniq' that initialization is complete,
 *    and that the caller is now ready to begin adding records to the
 *    unique object.
 *
 *    Returns an error if the object is not completely specified or if
 *    there are field errors as specified in skUniqueSetFields().
 */
int skUniquePrepareForInput(
    sk_unique_t                    *uniq,
    sk_msg_fn_t                     err_fn);


/*
 *  ok = skUniquePrepareForOutput(uniq);
 *
 *    Tells the unique object 'uniq' that all records have been added,
 *    and the unqiue object can now do whatever it needs to do before
 *    the caller requests get the binned records.
 *
 *    Returns 0 on success.  Returns -1 if skUniquePrepareForInput()
 *    has not yet been called.
 *
 *    This function may write data to a temporary file; it will return
 *    -1 if there is an error writing the file.
 */
int skUniquePrepareForOutput(
    sk_unique_t                    *uniq);


/*
 *  status = skUniqueAddRecord(uniq, rwrec);
 *
 *    Adds the SiLK Flow record 'rwrec' to the unique object 'uniq'.
 *    This function should only be called after a call to
 *    skUniquePrepareForInput().
 *
 *    This function will call skFieldListRecToBinary() for the key
 *    fields (and the distinct fields, if any).  This function will
 *    call skFieldListAddRecToBuffer() for the value fields, if any.
 *
 *    Return 0 on success.  Return -1 on error.  Error conditions
 *    include calling this function before calling
 *    skUniquePrepareForInput(), being unable to write a temporary
 *    file, or being unable to create a new hash table after writing
 *    the current hash table to disk.
 */
int skUniqueAddRecord(
    sk_unique_t                    *uniq,
    const rwRec                    *rwrec);


/*
 *  ok = skUniqueIteratorCreate(uniq, &iter);
 *
 *    Creates a new iterator that can be used to get the bins from the
 *    unique object, 'uniq'.  The iterator is stored in the location
 *    pointed to be 'iter'.
 *
 *    It is an error to call this function before calling
 *    skUniquePrepareForOutput().
 *
 *    Returns 0 on success, or -1 if there is an error allocating
 *    memory for the iterator.  This function may affect temporary
 *    files; it will return -1 if there is an error reading or writing
 *    the files.
 */
int skUniqueIteratorCreate(
    sk_unique_t                    *uniq,
    sk_unique_iterator_t          **iter);


/*
 *  void skUniqueIteratorDestroy(sk_unique_iterator_t **iter);
 *
 *    Destroys the iterator pointed to by 'iter' and sets 'iter' to
 *    NULL.  The function is a no-op if iter or *iter is NULL.
 */
#define skUniqueIteratorDestroy(uniqit_iter)    \
    if (*(uniqit_iter)) {                       \
        (*(uniqit_iter))->free_fn(uniqit_iter); \
    }

typedef void (*sk_uniqiter_free_fn_t)(
    sk_unique_iterator_t          **iter);


/*
 *  int skUniqueIteratorReset(sk_unique_iterator_t *iter);
 *
 *    Resets the iterator 'iter' so it can loop through the entries in
 *    the unique object another time.
 */
#define skUniqueIteratorReset(uniqit_iter)      \
    ((uniqit_iter)->reset_fn(uniqit_iter))

typedef int (*sk_uniqiter_reset_fn_t)(
    sk_unique_iterator_t           *iter);


/*
 *  int skUniqueIteratorNext(
 *      sk_unique_iterator_t           *iter,
 *      uint8_t                       **key_fields_buffer,
 *      uint8_t                       **distinct_fields_buffer,
 *      uint8_t                       **agg_value_fields_buffer);
 *
 *    Sets the buffer pointers to buffers holding values associated
 *    with the current row.  The values are valid until the next call
 *    to this function.
 */
#define skUniqueIteratorNext(uniqit_iter, uniqit_key, uniqit_dis, uniqit_val) \
    ((uniqit_iter)->next_fn((uniqit_iter), (uniqit_key),                \
                            (uniqit_dis), (uniqit_val)))

typedef int (*sk_uniqiter_next_fn_t)(
    sk_unique_iterator_t           *iter,
    uint8_t                       **key_fields_buffer,
    uint8_t                       **distinct_fields_buffer,
    uint8_t                       **value_fields_buffer);


/* typedef struct sk_unique_iterator_st sk_unique_iterator_t; */
struct sk_unique_iterator_st {
    sk_uniqiter_reset_fn_t  reset_fn;
    sk_uniqiter_next_fn_t   next_fn;
    sk_uniqiter_free_fn_t   free_fn;
};




/* structure for binning records */
typedef struct sk_sort_unique_st sk_sort_unique_t;


int skPresortedUniqueCreate(
    sk_sort_unique_t  **ps_uniq);


void skPresortedUniqueDestroy(
    sk_sort_unique_t  **ps_uniq);



void skPresortedUniqueSetTempDirectory(
    sk_sort_unique_t   *ps_uniq,
    const char         *temp_dir);


int skPresortedUniqueSetFields(
    sk_sort_unique_t               *ps_uniq,
    const sk_fieldlist_t           *key_fields,
    const sk_fieldlist_t           *distinct_fields,
    const sk_fieldlist_t           *agg_value_fields);


int skPresortedUniqueAddInputFile(
    sk_sort_unique_t               *ps_uniq,
    const char                     *filename);


int skPresortedUniqueSetPostOpenFn(
    sk_sort_unique_t               *ps_uniq,
    int                           (*stream_post_open)(skstream_t *));


int skPresortedUniqueSetReadFn(
    sk_sort_unique_t               *ps_uniq,
    int                           (*stream_read)(skstream_t *, rwRec *));


int skPresortedUniqueProcess(
    sk_sort_unique_t               *ps_uniq,
    sk_unique_output_fn_t           output_fn,
    void                           *callback_data);


#endif  /* _RWSKUNIQUE_H */

/*
** Local Variables:
** mode:c
** indent-tabs-mode:nil
** c-basic-offset:4
** End:
*/
