Source code for caar.history

from __future__ import absolute_import, division, print_function

import pickle
import random
from collections import namedtuple

import pandas as pd

from caar.cleanthermostat import _sort_meta_in_col_order

from future import standard_library
standard_library.install_aliases()


Cycle = namedtuple('Cycle', ['device_id', 'cycle_mode', 'start_time'])
Sensor = namedtuple('Sensor', ['sensor_id', 'timestamp'])
Geospatial = namedtuple('Geospatial', ['location_id', 'timestamp'])


[docs]def create_sensors_df(dict_or_pickle_file, sensor_ids=None):
    """Returns pandas DataFrame containing sensor ID, timestamps and
    sensor observations.

    Args:
        dict_or_pickle_file (dict or str): The object must have been created with dict_from_file() or pickle_from_file() function.

        sensor_ids (Optional[list or other iterable of ints or strings]): Sensor IDs. If no argument is specified, all IDs from the first arg will be in the DataFrame.

    Returns:
        sensors_df (pandas DataFrame): DataFrame has MultiIndex based on the
        ID(s) and timestamps.
    """
    fields = list(Sensor._fields)
    multi_ids, vals, meta = _records_as_lists_of_tuples(dict_or_pickle_file,
                                                        fields, ids=sensor_ids)
    id_labels = [meta[col]['heading'] for col in ['id', 'time']]
    data_labels = _data_labels_from_meta(meta, id_labels)
    sensors_df = _create_multi_index_df(id_labels, multi_ids, data_labels, vals)
    return sensors_df


[docs]def create_cycles_df(dict_or_pickle_file, device_ids=None):
    """Returns pandas DataFrame containing sensor ids and cycle beginning
    timestamps as multi-part indexes, and cycle ending times as values.

    Args:
        dict_or_pickle_file (dict or str): Must have been created with dict_from_file() or pickle_from_file() function.

        device_ids (Optional[list or other iterable of ints or strings]): Sensor IDs. If no  argument is specified, all IDs from the first arg will be in the DataFrame.

    Returns:
        cycles_df (pandas DataFrame): DataFrame has MultiIndex based on the ID(s) and timestamps.
    """
    multi_ids, vals, meta = _records_as_lists_of_tuples(dict_or_pickle_file,
                                                        list(Cycle._fields),
                                                        ids=device_ids)
    id_labels = [meta[col]['heading'] for col in ['id', 'cycle', 'start_time']]
    data_labels = _data_labels_from_meta(meta, id_labels)
    cycles_df = _create_multi_index_df(id_labels, multi_ids, data_labels, vals)
    return cycles_df


[docs]def create_geospatial_df(dict_or_pickle_file, location_ids=None):
    """Returns pandas DataFrame containing records with location IDs and time
    stamps as multi-part indexes and outdoor temperatures as values.

    Args:
        dict_or_pickle_file (dict or str): Must have been created with dict_from_file() or pickle_from_file() function.

        location_ids (Optional[list or other iterable of ints or strings]): Location IDs. If no argument is specified, all IDs from the first arg will be in the DataFrame.

    Returns:
        geospatial_df (pandas DataFrame): DataFrame has MultiIndex based on the ID(s) and timestamps.
    """
    multi_ids, vals, meta = _records_as_lists_of_tuples(dict_or_pickle_file,
                                                        list(Geospatial._fields),
                                                        ids=location_ids)
    id_labels = [meta[col]['heading'] for col in ['id', 'time']]
    data_labels = _data_labels_from_meta(meta, id_labels)
    geospatial_df = _create_multi_index_df(id_labels, multi_ids, data_labels, vals)
    return geospatial_df


def _records_as_lists_of_tuples(dict_or_pickle_file, fields,
                                ids=None):
    """Returns tuple containing
    1) a list of named tuples containing sensor (or outdoor location) ids
    and timestamps and
    2) a list of either indoor (or outdoor) temperatures, or the ending time
    of a cycle, based on input of a pickle file containing a dict.
    """
    records = {}
    if isinstance(dict_or_pickle_file, dict):
        records = dict_or_pickle_file['records']
        meta = dict_or_pickle_file['cols_meta']
    else:
        try:
            with open(dict_or_pickle_file, 'rb') as cp:
                container = pickle.load(cp)
                records = container['records']
                meta = container['cols_meta']
        except ValueError:
            print('The first argument must be a pickle file or dict.')
    if ids is not None:
        for record_key in list(records.keys()):
            # Discard record if it is not among the desired ids.
            if getattr(record_key, fields[0]) not in ids:
                records.pop(record_key, None)
    multi_ids, vals = _multi_ids_and_data_vals(records, fields)
    return multi_ids, vals, meta


def _data_labels_from_meta(meta, id_labels):
    sorted_meta = _sort_meta_in_col_order(meta)
    data_labels = [meta[col]['heading'] for col in
                   list(sorted_meta)[len(id_labels):]]
    return data_labels


[docs]def random_record(dict_or_pickle_file, value_only=False):
    """Returns a randomly chosen key-value pair from a dict or pickle file."""
    records = {}
    if isinstance(dict_or_pickle_file, dict):
        records = dict_or_pickle_file['records']
    else:
        try:
            with open(dict_or_pickle_file, 'rb') as cp:
                container = pickle.load(cp)
                records = container['records']
        except ValueError:
            print('The first argument must be a pickle file or dict.')

    copied_keys = list(records.keys())
    random_record_key = _random_record_key(copied_keys)
    if value_only:
        return records[random_record_key]
    else:
        return random_record_key, records[random_record_key]


def _random_record_key(keys):
    try:
        random_record_key = random.choice(keys)
    except IndexError:
        print('No records in the dict or pickle file.')
    else:
        return random_record_key


def _multi_ids_and_data_vals(records, fields):
    """Returns tuple containing
    1) a list of named tuples containing ids and timestamps (and cycle modes
    if applicable) and
    2) a list of either temperatures or cycle ending times, based on items
    (records) in a dict.
    """
    multi_ids = []
    vals = []
    for k, v in records.items():
        ids = tuple(getattr(k, f) for f in fields)
        multi_ids.append(ids)
        vals.append(v)
    return multi_ids, vals


def _create_multi_index_df(multiindex_names, multi_ids, column_names, values):
    """Returns MultiIndex pandas dataframe in which the index columns are for
    an id and timestamp and the value is for a temperature or a timestamp
    indicating the end of a cycle.
    """
    multiindex_columns = tuple(multiindex_names)
    multicols = pd.MultiIndex.from_tuples(multi_ids, names=multiindex_columns)
    df = pd.DataFrame(values, index=multicols, columns=column_names)
    df.sort_index(inplace=True, sort_remaining=True)
    return df