Source code for caar.history

from __future__ import absolute_import, division, print_function

import pickle
import random
from collections import namedtuple

import pandas as pd

from caar.cleanthermostat import _sort_meta_in_col_order

from future import standard_library
standard_library.install_aliases()


Cycle = namedtuple('Cycle', ['device_id', 'cycle_mode', 'start_time'])
Sensor = namedtuple('Sensor', ['sensor_id', 'timestamp'])
Geospatial = namedtuple('Geospatial', ['location_id', 'timestamp'])


[docs]def create_sensors_df(dict_or_pickle_file, sensor_ids=None): """Returns pandas DataFrame containing sensor ID, timestamps and sensor observations. Args: dict_or_pickle_file (dict or str): The object must have been created with dict_from_file() or pickle_from_file() function. sensor_ids (Optional[list or other iterable of ints or strings]): Sensor IDs. If no argument is specified, all IDs from the first arg will be in the DataFrame. Returns: sensors_df (pandas DataFrame): DataFrame has MultiIndex based on the ID(s) and timestamps. """ fields = list(Sensor._fields) multi_ids, vals, meta = _records_as_lists_of_tuples(dict_or_pickle_file, fields, ids=sensor_ids) id_labels = [meta[col]['heading'] for col in ['id', 'time']] data_labels = _data_labels_from_meta(meta, id_labels) sensors_df = _create_multi_index_df(id_labels, multi_ids, data_labels, vals) return sensors_df
[docs]def create_cycles_df(dict_or_pickle_file, device_ids=None): """Returns pandas DataFrame containing sensor ids and cycle beginning timestamps as multi-part indexes, and cycle ending times as values. Args: dict_or_pickle_file (dict or str): Must have been created with dict_from_file() or pickle_from_file() function. device_ids (Optional[list or other iterable of ints or strings]): Sensor IDs. If no argument is specified, all IDs from the first arg will be in the DataFrame. Returns: cycles_df (pandas DataFrame): DataFrame has MultiIndex based on the ID(s) and timestamps. """ multi_ids, vals, meta = _records_as_lists_of_tuples(dict_or_pickle_file, list(Cycle._fields), ids=device_ids) id_labels = [meta[col]['heading'] for col in ['id', 'cycle', 'start_time']] data_labels = _data_labels_from_meta(meta, id_labels) cycles_df = _create_multi_index_df(id_labels, multi_ids, data_labels, vals) return cycles_df
[docs]def create_geospatial_df(dict_or_pickle_file, location_ids=None): """Returns pandas DataFrame containing records with location IDs and time stamps as multi-part indexes and outdoor temperatures as values. Args: dict_or_pickle_file (dict or str): Must have been created with dict_from_file() or pickle_from_file() function. location_ids (Optional[list or other iterable of ints or strings]): Location IDs. If no argument is specified, all IDs from the first arg will be in the DataFrame. Returns: geospatial_df (pandas DataFrame): DataFrame has MultiIndex based on the ID(s) and timestamps. """ multi_ids, vals, meta = _records_as_lists_of_tuples(dict_or_pickle_file, list(Geospatial._fields), ids=location_ids) id_labels = [meta[col]['heading'] for col in ['id', 'time']] data_labels = _data_labels_from_meta(meta, id_labels) geospatial_df = _create_multi_index_df(id_labels, multi_ids, data_labels, vals) return geospatial_df
def _records_as_lists_of_tuples(dict_or_pickle_file, fields, ids=None): """Returns tuple containing 1) a list of named tuples containing sensor (or outdoor location) ids and timestamps and 2) a list of either indoor (or outdoor) temperatures, or the ending time of a cycle, based on input of a pickle file containing a dict. """ records = {} if isinstance(dict_or_pickle_file, dict): records = dict_or_pickle_file['records'] meta = dict_or_pickle_file['cols_meta'] else: try: with open(dict_or_pickle_file, 'rb') as cp: container = pickle.load(cp) records = container['records'] meta = container['cols_meta'] except ValueError: print('The first argument must be a pickle file or dict.') if ids is not None: for record_key in list(records.keys()): # Discard record if it is not among the desired ids. if getattr(record_key, fields[0]) not in ids: records.pop(record_key, None) multi_ids, vals = _multi_ids_and_data_vals(records, fields) return multi_ids, vals, meta def _data_labels_from_meta(meta, id_labels): sorted_meta = _sort_meta_in_col_order(meta) data_labels = [meta[col]['heading'] for col in list(sorted_meta)[len(id_labels):]] return data_labels
[docs]def random_record(dict_or_pickle_file, value_only=False): """Returns a randomly chosen key-value pair from a dict or pickle file.""" records = {} if isinstance(dict_or_pickle_file, dict): records = dict_or_pickle_file['records'] else: try: with open(dict_or_pickle_file, 'rb') as cp: container = pickle.load(cp) records = container['records'] except ValueError: print('The first argument must be a pickle file or dict.') copied_keys = list(records.keys()) random_record_key = _random_record_key(copied_keys) if value_only: return records[random_record_key] else: return random_record_key, records[random_record_key]
def _random_record_key(keys): try: random_record_key = random.choice(keys) except IndexError: print('No records in the dict or pickle file.') else: return random_record_key def _multi_ids_and_data_vals(records, fields): """Returns tuple containing 1) a list of named tuples containing ids and timestamps (and cycle modes if applicable) and 2) a list of either temperatures or cycle ending times, based on items (records) in a dict. """ multi_ids = [] vals = [] for k, v in records.items(): ids = tuple(getattr(k, f) for f in fields) multi_ids.append(ids) vals.append(v) return multi_ids, vals def _create_multi_index_df(multiindex_names, multi_ids, column_names, values): """Returns MultiIndex pandas dataframe in which the index columns are for an id and timestamp and the value is for a temperature or a timestamp indicating the end of a cycle. """ multiindex_columns = tuple(multiindex_names) multicols = pd.MultiIndex.from_tuples(multi_ids, names=multiindex_columns) df = pd.DataFrame(values, index=multicols, columns=column_names) df.sort_index(inplace=True, sort_remaining=True) return df