G. Mias Lab »
Source code for pyiomica.dataStorage

'''Data storage functions'''


import h5py
import json
import pickle

from .globalVariables import *

from .extendedDataFrame import DataFrame
from . import utilityFunctions


[docs]def read(fileName, withPKLZextension = True, hdf5fileName = None, jsonFormat = False):

    """Read object from a file recorded by function "write". Pandas and Numpy objects are
    read from HDF5 file when provided, otherwise attempt to read from PKLZ file.

    Parameters:
        fileName: str
            Path of directories ending with the file name

        withPKLZextension: boolean, Default True
            Add ".pklz" to a pickle file

        hdf5fileName: str, Default None
            Path of directories ending with the file name. 
            If None then data is read from a pickle file

        jsonFormat: boolean, Default False
            Save data into compressed json file 
    
    Returns:
        data 
            Data object to write into a file

    Usage:
        exampleDataFrame = read('/dir1/exampleDataFrame', hdf5fileName='/dir2/data.h5')
    """

   
    if jsonFormat:
        utilityFunctions.createDirectories("/".join(fileName.split("/")[:-1]))

        with gzip.GzipFile(fileName, 'r') as tempFile:
            data = json.loads(tempFile.read().decode('utf-8'))

        return data

    if hdf5fileName!=None:
        if not os.path.isfile(hdf5fileName):
            print(hdf5fileName, 'not found.')
            return None

        hdf5file = h5py.File(hdf5fileName, 'r')
        
        key = os.path.basename(fileName)
        if key in hdf5file:
            if hdf5file[key].attrs['gtype']=='pd':
                return DataFrame(pd.read_hdf(hdf5fileName, key=key, mode='r'))

        key = 'arrays/' + os.path.basename(fileName)
        if key in hdf5file:
            if hdf5file[key].attrs['gtype']=='np':
                return hdf5file[key].value
        
        searchPickled = print(os.path.basename(fileName), 'not found in', hdf5fileName)

    if hdf5fileName==None or ('searchPickled' in locals()):  
        if not os.path.isfile(fileName + ('.pklz' if withPKLZextension else '')):
            print(fileName + ('.pklz' if withPKLZextension else ''), 'not found.')
            return None

        with gzip.open(fileName + ('.pklz' if withPKLZextension else ''),'rb') as temp_file:
            data = pickle.load(temp_file)

    return data

[docs]def write(data, fileName, withPKLZextension = True, hdf5fileName = None, jsonFormat = False):

    """Write object into a file. Pandas and Numpy objects are recorded in HDF5 format
    when 'hdf5fileName' is provided otherwise pickled into a new file.

    Parameters:
        data: any type
            Data object to write into a file

        fileName: str
            Path of directories ending with the file name

        withPKLZextension: boolean, Default True
            Add ".pklz" to a pickle file

        hdf5fileName: str, Default None
            Path of directories ending with the file name. If None then data is pickled

        jsonFormat: boolean, Default False
            Save data into compressed json file 

    Returns:
        None

    Usage:
        write(exampleDataFrame, '/dir1/exampleDataFrame', hdf5fileName='/dir2/data.h5')
    """

    if jsonFormat:
        utilityFunctions.createDirectories("/".join(fileName.split("/")[:-1]))

        with gzip.GzipFile(fileName, 'w') as tempFile:
            tempFile.write(json.dumps(data).encode('utf-8'))

        return None

    if hdf5fileName!=None and type(data) in [pd.DataFrame, DataFrame]:
        utilityFunctions.createDirectories("/".join(hdf5fileName.split("/")[:-1]))
        key=os.path.basename(fileName)

        pd.DataFrame(data).to_hdf(hdf5fileName, key=key, mode='a', complevel=6, complib='zlib')

        hdf5file = h5py.File(hdf5fileName, 'a')
        hdf5file[key].attrs['gtype'] = 'pd'
    elif hdf5fileName!=None and type(data) is np.ndarray:
        utilityFunctions.createDirectories(hdf5fileName)
        hdf5file = h5py.File(hdf5fileName, 'a')
        key = 'arrays/' + fileName.split("/")[-1]
        data = data.astype(float)
        if not key in hdf5file:
            hdf5file.create_dataset(key, data=data, maxshape=tuple([None]*len(data.shape)), dtype=data.dtype,
                                    compression='gzip', compression_opts=6)
        else:
            dataset = hdf5file[key]
            if dataset.shape!=data.shape:
                dataset.resize(data.shape)
            dataset[...] = data
        hdf5file[key].attrs['gtype'] = 'np'
    else:
        if hdf5fileName!=None:
            print('HDF5 format is not supported for data type:', type(data))
            print('Recording data to a pickle file.')

        utilityFunctions.createDirectories("/".join(fileName.split("/")[:-1]))

        with gzip.open(fileName + ('.pklz' if withPKLZextension else ''),'wb') as temp_file:
            pickle.dump(data, temp_file, protocol=4)

    return None