"""
Date and time related functions for RAVE analyses
"""

import pdb
import re 
from datetime import date, datetime, timedelta
from calendar import timegm
from time import gmtime

fmt_silk_datetime  = "%(year)s/%(mon)s/%(day)s:%(hour)s:%(min)s:%(sec)s%(fsec)s"
fmt_silk_date      = "%(year)s/%(mon)s/%(day)s"

# SiLK dates at various resolutions
fmt_silk_date_res = {
    'day'    : fmt_silk_date,
    'hour'   : fmt_silk_date + ":%(hour)s",
    'minute' : fmt_silk_date + ":%(hour)s:%(min)s",
    'second' : fmt_silk_date + ":%(hour)s:%(min)s:%(sec)s",
    'msec'   : fmt_silk_date + ":%(hour)s:%(min)s:%(sec)s%(fsec)s"
}

re_silk_datetime_1 = re.compile(r"""
          \s*
          (?P<mon>\d\d)
        / (?P<day>\d\d)
        / (?P<year>\d\d\d\d)
(?: [ :T] (?P<hour>\d\d)
    (?: : (?P<min>\d\d)
    (?: : (?P<sec>\d\d) (?P<fsec> \.\d+)? )? )? )?
          \s*
""", re.VERBOSE)

re_silk_datetime_2 = re.compile(r"""
          \s*
          (?P<year>\d\d\d\d)
        / (?P<mon>\d\d)
        / (?P<day>\d\d)
(?: [ :T] (?P<hour>\d\d)
(?:     : (?P<min>\d\d)
(?:     : (?P<sec>\d\d) (?P<fsec> \.\d+)? )? )? )?
          \s*
""", re.VERBOSE)

fmt_iso_datetime = "%(year)s-%(mon)s-%(day)s %(hour)s:%(min)s:%(sec)s%(fsec)s"
fmt_iso_date     = "%(year)s-%(mon)s-%(day)s"

re_iso_datetime = re.compile(r"""
         \s*
         (?P<year>\d\d\d\d)
       - (?P<mon>\d\d)
       - (?P<day>\d\d)
(?: [ T] (?P<hour>\d\d)
(?:    : (?P<min>\d\d)
(?:    : (?P<sec>\d\d) (?P<fsec> \.\d+)? )? )? )?
(?:      (?P<tz> Z | [+-] \d\d (?: :\d\d )? ) )?
         \s*
""", re.VERBOSE)

datetime_regexps = [ re_silk_datetime_1,
                     re_silk_datetime_2,
                     re_iso_datetime ]

def parse_any_datetime(value):
    # Note: If you change this, also consider changing datetime_obj.
    # datetime_obj inlines some of this to avoid using regex parsing
    # in the most common case.
    if isinstance(value, (int, long)):
        value = datetime.utcfromtimestamp(value)
    try:
        value = datetime.utcfromtimestamp(float(value))
    except:
        pass
    if isinstance(value, datetime):
        value = str(value)
    elif isinstance(value, date):
        value = str(value)
    if not isinstance(value, basestring):
        raise TypeError("parse_any_datetime: can't handle type %s" %
                        str(type(value)))
    value = value.strip()
    for r in datetime_regexps:
        m = r.match(value)
        if m:
            if m.end() < len(value):
                raise ValueError("Could not parse entire string '%s' as date" %
                                 value)
            result = dict(m.groupdict())
            for k in ('hour', 'min', 'sec'):
                if result[k] is None:
                    result[k] = '00'
            if result['fsec'] is None:
                result['fsec'] = ''
            else:
                result['fsec'] = (result['fsec'] + '000')[:4]
            return result
    raise ValueError("invalid literal for date: %s" % value)

# Convert date/time in one of our accepted formats to ISO format
def iso_datetime(t):
    return (fmt_iso_datetime % parse_any_datetime(t))

def iso_date(t):
    return (fmt_iso_date % parse_any_datetime(t))

#def silk_datetime(t):
#    return (fmt_silk_datetime % parse_any_datetime(t))
def silk_datetime(t, resolution="msec"):
    """Get SiLK-friendly string representation of date.

    resolution: minimum resolution of date. (E.g.,
    "2006-01-01:01" ["hour" resolution] versus
    "2006-01-01:01:00:00" ["second" resolution])"""
    try:
        datefmt = fmt_silk_date_res[resolution]
    except KeyError:
        datefmt = fmt_silk_date_res["msec"]
    return (datefmt % parse_any_datetime(t))

def silk_day(t):
    'Shortcut for silk_datetime(t,  "day")'
    return silk_datetime(t, resolution="day")

def silk_hour(t):
    'Shortcut for silk_datetime(t,  "hour")'
    return silk_datetime(t, resolution="hour")

def silk_minute(t):
    'Shortcut for silk_datetime(t,  "minute")'
    return silk_datetime(t, resolution="minute")

def silk_second(t):
    """Shortcut for silk_datetime(t,  "second"). 
    Note that silk_second(t) == silk_datetime(t)."""
    return silk_datetime(t, resolution="second")

def silk_date(t):
    return (fmt_silk_date % parse_any_datetime(t))

def silk_datetime_range(t1, t2):
    return silk_datetime(t1) + '-' + silk_datetime(t2)

def silk_date_range(t1, t2):
    return silk_date(t1) + '-' + silk_date(t2)

def datetime_obj(t):
    # Note: The below inlines some code from parse_any_datetime,
    # to efficiently handle the most common cases without doing
    # any regex-based parsing.
    if isinstance(t, datetime):
        return t
    if isinstance(t, date):
        return datetime(t.year, t.month, t.day, 0, 0, 0)
    if isinstance(t, (int, long)):
        return datetime.utcfromtimestamp(t)
    try:
        return datetime.utcfromtimestamp(int(t))
    except:
        pass
    d = parse_any_datetime(t)
    if d['fsec'] == '': d['fsec'] = '.0'
    return datetime(int(d['year']), int(d['mon']), int(d['day']),
                    int(d['hour']), int(d['min']), int(d['sec']),
                    int(float(d['fsec']) * 1000000))

def bin_datetime(delta_secs, t, ceil=False):
    t = datetime_obj(t)
    if timegm(t.timetuple())%delta_secs == 0:
        ceil = False
    if ceil:
        return (timegm(t.timetuple())/delta_secs + 1) * delta_secs
    else:
        return (timegm(t.timetuple())/delta_secs) * delta_secs


def bin_by(delta, times, *value_cols):
    """
    Bin data into new bins, based on the given bin size delta.
    Times should be a sorted sequence of datatime.datetime.
    Values should be a sequence of num.
    """
    if isinstance(delta, int):
        delta = timedelta(seconds=delta)
    delta_secs = delta.seconds + 86400 * delta.days
    result_times = []
    result_values = tuple([] for i in xrange(len(value_cols)))
    len_times = len(times)
    i = 0
    while i < len_times:
        last_bin_secs = bin_datetime(delta_secs, times[i])
        last_bin_value = [0.0] * len(value_cols)
        while i < len_times and \
                  bin_datetime(delta_secs, times[i]) == last_bin_secs:
            for j in xrange(len(value_cols)):
                last_bin_value[j] += value_cols[j][i]
            i = i + 1
        result_times.append(datetime.utcfromtimestamp(last_bin_secs))
        for j in xrange(len(value_cols)):
            result_values[j].append(last_bin_value[j])
    return (result_times,) + result_values

def bin_labeled_by(delta, labels, times, *value_cols):
    label_names = set(labels)
    label_times = dict((label, []) for label in label_names)
    label_values = dict((label, tuple([] for i in xrange(len(value_cols))))
                        for label in label_names)
    for i in xrange(len(labels)):
        label_times[labels[i]].append(times[i])
        for j in xrange(len(value_cols)):
            label_values[labels[i]][j].append(value_cols[j][i])
    for l in label_names:
        order = range(len(label_times[l]))
        order.sort(key=(lambda i: label_times[l][i]))
        label_times[l] = [label_times[l][i] for i in order]
        label_values[l] = tuple([label_values[l][j][i] for i in order]
                                for j in xrange(len(value_cols)))
        result = bin_by(delta, label_times[l], *label_values[l])
        label_times[l] = result[0]
        label_values[l] = result[1:]
    return ([l for l in label_names for i in label_times[l]],
            [t for l in label_names for t in label_times[l]]) + \
           tuple([v for l in label_names for v in label_values[l][j]]
                 for j in xrange(len(value_cols)))

def moving_average(delta, times, values):
    """
    Compute the moving average and standard deviation of values over
    times, in periods of size delta.  Delta should be a
    datetime.timedelta.  Times should be a sorted sequence of
    datetime.datetime.  Values should be a sequence of num.
    """
    # Algorithm:
    # 1. Fill the window until it contains at least delta worth of data.
    #    (If impossible, stop.)
    # 2. Emit the moving average for the window.
    # 3. Remove one value from the window.
    # 4. Goto 1
    i = 0
    len_times = len(times)
    init_time = times[0]
    result_times = []
    result_values = []
    result_stddevs = []
    window_times = []
    window_values = []
    val_sum = 0.0
    val_sq_sum = 0.0
    val_count = 0
    time_sum = timedelta(0)
    try:
        while True:
            # Will exit with exception when we run out of data
            while len(window_times) < 1 or \
                  window_times[-1] - window_times[0] < delta:
                if i >= len_times:
                    if window_times[-1] - window_times[0] == delta:
                        break
                    raise IndexError('list index out of range')
                if len(window_times) >= 1 and \
                   times[i] - window_times[0] >= delta:
                    break
                window_times.append(times[i])
                time_sum = time_sum + (times[i] - init_time)
                window_values.append(values[i])
                val_sum = val_sum + values[i]
                val_sq_sum = val_sq_sum + values[i]**2
                val_count = val_count + 1
                i = i + 1
            # Emit value
            result_times.append(init_time + time_sum / val_count)
            result_values.append(val_sum / val_count)
            if val_count > 1:
                try:
                    stddev = ((val_sq_sum - (val_sum**2)/val_count)/(val_count-1)) ** 0.5
                except:
                    stddev = 0.0
            else:
                stddev = 0.0
            result_stddevs.append(stddev)
            # Drop last
            old_time = window_times.pop(0)
            time_sum = time_sum - (old_time - init_time)
            old_value = window_values.pop(0)
            val_sum = val_sum - old_value
            val_sq_sum = val_sq_sum - old_value**2
            val_count = val_count - 1
            if (len(window_times) < 2 or \
                window_times[-1] - window_times[0] <= delta) and \
                i >= len_times:
                break
    except IndexError:
        pass
    return (result_times, result_values, result_stddevs)

### Duration handling

re_iso_duration = re.compile(r"""
    \s*
    P
    (?: (?P<years> \d+) Y)?
    (?: (?P<mons> \d+) M)?
    (?: (?P<days> \d+) D)?
(?: T
    (?: (?P<hours> \d+) H)?
    (?: (?P<mins> \d+) M)?
    (?: (?P<secs> \d+ (?P<fsecs> \.\d+)? ) S)? )?
    \s*
""", re.VERBOSE)

# We don't actually handle fractional seconds here

def check_duration(dur):
    m = re_iso_duration.match(dur)
    if (not m) or (m.end() < len(dur)):
        raise ValueError("Invalid value for duration: %s" % `dur`)

def add_duration(t, dur):
    m = re_iso_duration.match(dur)
    if (not m) or (m.end() < len(dur)):
        raise ValueError("Invalid value for duration: %s" % `dur`)
    d = m.groupdict('0')
    (years, mons, days, hours, mins, secs) = \
        (int(d[x]) for x in ('years', 'mons', 'days', 'hours', 'mins', 'secs'))
    (year, mon, day, hour, min, sec, _w, _y, _d) = datetime_obj(t).timetuple()
    mon = mon - 1
    # Add months and years
    (carry, new_mon) = divmod(mon + mons, 12)
    new_year = year + years + carry
    # Normalize it a bit
    (year2, mon2, day2, hour2, min2, sec2, _w, _y, _d) = \
        gmtime(timegm((new_year, new_mon+1, day, hour, min, sec, _w, _y, _d)))
    mon2 = mon2 - 1
    # Is there month overflow?  If so, readust to the last day of the
    # month.  The adjustment is to start with mon2+1 for month
    # and 0 for day (the last day of the month).
    if day2 < day:
        day2 = 0
        (carry, new_mon) = divmod(new_mon + 1, 12)
        new_year = new_year + carry
    # Now, starting from t2, add in the days, hours, minutes, and seconds
    t2 = timegm((new_year, new_mon+1, day2+days,
                 hour2+hours, min2+mins, sec2+secs))
    return datetime.utcfromtimestamp(t2)

def sub_duration(t, dur):
    m = re_iso_duration.match(dur)
    if (not m) or (m.end() < len(dur)):
        raise ValueError("Invalid value for duration: %s" % `dur`)
    d = m.groupdict('0')
    (years, mons, days, hours, mins, secs) = \
        (int(d[x]) for x in ('years', 'mons', 'days', 'hours', 'mins', 'secs'))
    (year, mon, day, hour, min, sec, _w, _y, _d) = datetime_obj(t).timetuple()
    mon = mon - 1
    # Diff months and years
    (carry, new_mon) = divmod(mon - mons, 12)
    new_year = year - years + carry
    # Normalize it a bit
    (year2, mon2, day2, hour2, min2, sec2, _w, _y, _d) = \
        gmtime(timegm((new_year, new_mon+1, day, hour, min, sec, _w, _y, _d)))
    mon2 = mon2 - 1
    # Is there month overflow?  If so, readust to the last day of the
    # month.  The adjustment is to start with mon2+1 for month
    # and 0 for day (the last day of the previous month).
    if day2 < day:
        day2 = 0
        (carry, new_mon) = divmod(new_mon + 1, 12)
        new_year = new_year + carry
    # Now, starting from t2, diff the days, hours, minutes, and seconds
    t2 = timegm((new_year, new_mon+1, day2-days,
                 hour2-hours, min2-mins, sec2-secs))
    return datetime.utcfromtimestamp(t2)

__all__ = [ 'iso_datetime', 'iso_date', 'silk_datetime', 'silk_date',
            'silk_datetime_range', 'silk_date_range', 'datetime_obj',
            'moving_average', 'bin_by', 'bin_labeled_by', 'bin_datetime',
            'add_duration', 'sub_duration', 'silk_hour', 'check_duration' ]
