Source code for sas.dataloader.readers.ascii_reader

"""
    ASCII reader
"""
############################################################################
#This software was developed by the University of Tennessee as part of the
#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
#project funded by the US National Science Foundation. 
#If you use DANSE applications to do scientific research that leads to
#publication, we ask that you acknowledge the use of the software with the
#following sentence:
#This work benefited from DANSE software developed under NSF award DMR-0520547.
#copyright 2008, University of Tennessee
#############################################################################


import numpy
import os
from sas.dataloader.data_info import Data1D

# Check whether we have a converter available
has_converter = True
try:
    from sas.data_util.nxsunit import Converter
except:
    has_converter = False
_ZERO = 1e-16


[docs]class Reader: """ Class to load ascii files (2, 3 or 4 columns). """ ## File type type_name = "ASCII" ## Wildcards type = ["ASCII files (*.txt)|*.txt", "ASCII files (*.dat)|*.dat", "ASCII files (*.abs)|*.abs", "CSV files (*.csv)|*.csv"] ## List of allowed extensions ext = ['.txt', '.TXT', '.dat', '.DAT', '.abs', '.ABS', 'csv', 'CSV'] ## Flag to bypass extension check allow_all = True
[docs] def read(self, path): """ Load data file :param path: file path :return: Data1D object, or None :raise RuntimeError: when the file can't be opened :raise ValueError: when the length of the data vectors are inconsistent """ if os.path.isfile(path): basename = os.path.basename(path) _, extension = os.path.splitext(basename) if self.allow_all or extension.lower() in self.ext: try: # Read in binary mode since GRASP frequently has no-ascii # characters that brakes the open operation input_f = open(path,'rb') except: raise RuntimeError, "ascii_reader: cannot open %s" % path buff = input_f.read() lines = buff.split('\n') #Jae could not find python universal line spliter: #keep the below for now # some ascii data has \r line separator, # try it when the data is on only one long line if len(lines) < 2 : lines = buff.split('\r') x = numpy.zeros(0) y = numpy.zeros(0) dy = numpy.zeros(0) dx = numpy.zeros(0) #temp. space to sort data tx = numpy.zeros(0) ty = numpy.zeros(0) tdy = numpy.zeros(0) tdx = numpy.zeros(0) output = Data1D(x, y, dy=dy, dx=dx) self.filename = output.filename = basename data_conv_q = None data_conv_i = None if has_converter == True and output.x_unit != '1/A': data_conv_q = Converter('1/A') # Test it data_conv_q(1.0, output.x_unit) if has_converter == True and output.y_unit != '1/cm': data_conv_i = Converter('1/cm') # Test it data_conv_i(1.0, output.y_unit) # The first good line of data will define whether # we have 2-column or 3-column ascii has_error_dx = None has_error_dy = None #Initialize counters for data lines and header lines. is_data = False # Has more than 5 lines # More than "5" lines of data is considered as actual # data unless that is the only data mum_data_lines = 5 # To count # of current data candidate lines i = -1 # To count total # of previous data candidate lines i1 = -1 # To count # of header lines j = -1 # Helps to count # of header lines j1 = -1 #minimum required number of columns of data; ( <= 4). lentoks = 2 for line in lines: # Initial try for CSV (split on ,) toks = line.split(',') # Now try SCSV (split on ;) if len(toks) < 2: toks = line.split(';') # Now go for whitespace if len(toks) < 2: toks = line.split() try: #Make sure that all columns are numbers. for colnum in range(len(toks)): float(toks[colnum]) _x = float(toks[0]) _y = float(toks[1]) #Reset the header line counters if j == j1: j = 0 j1 = 0 if i > 1: is_data = True if data_conv_q is not None: _x = data_conv_q(_x, units=output.x_unit) if data_conv_i is not None: _y = data_conv_i(_y, units=output.y_unit) # If we have an extra token, check # whether it can be interpreted as a # third column. _dy = None if len(toks) > 2: try: _dy = float(toks[2]) if data_conv_i is not None: _dy = data_conv_i(_dy, units=output.y_unit) except: # The third column is not a float, skip it. pass # If we haven't set the 3rd column # flag, set it now. if has_error_dy == None: has_error_dy = False if _dy == None else True #Check for dx _dx = None if len(toks) > 3: try: _dx = float(toks[3]) if data_conv_i is not None: _dx = data_conv_i(_dx, units=output.x_unit) except: # The 4th column is not a float, skip it. pass # If we haven't set the 3rd column # flag, set it now. if has_error_dx == None: has_error_dx = False if _dx == None else True #After talked with PB, we decided to take care of only # 4 columns of data for now. #number of columns in the current line #To remember the # of columns in the current #line of data new_lentoks = len(toks) #If the previous columns not equal to the current, #mark the previous as non-data and reset the dependents. if lentoks != new_lentoks: if is_data == True: break else: i = -1 i1 = 0 j = -1 j1 = -1 #Delete the previously stored lines of data candidates # if is not data. if i < 0 and -1 < i1 < mum_data_lines and \ is_data == False: try: x = numpy.zeros(0) y = numpy.zeros(0) except: pass x = numpy.append(x, _x) y = numpy.append(y, _y) if has_error_dy == True: #Delete the previously stored lines of # data candidates if is not data. if i < 0 and -1 < i1 < mum_data_lines and \ is_data == False: try: dy = numpy.zeros(0) except: pass dy = numpy.append(dy, _dy) if has_error_dx == True: #Delete the previously stored lines of # data candidates if is not data. if i < 0 and -1 < i1 < mum_data_lines and \ is_data == False: try: dx = numpy.zeros(0) except: pass dx = numpy.append(dx, _dx) #Same for temp. #Delete the previously stored lines of data candidates # if is not data. if i < 0 and -1 < i1 < mum_data_lines and\ is_data == False: try: tx = numpy.zeros(0) ty = numpy.zeros(0) except: pass tx = numpy.append(tx, _x) ty = numpy.append(ty, _y) if has_error_dy == True: #Delete the previously stored lines of # data candidates if is not data. if i < 0 and -1 < i1 < mum_data_lines and \ is_data == False: try: tdy = numpy.zeros(0) except: pass tdy = numpy.append(tdy, _dy) if has_error_dx == True: #Delete the previously stored lines of # data candidates if is not data. if i < 0 and -1 < i1 < mum_data_lines and \ is_data == False: try: tdx = numpy.zeros(0) except: pass tdx = numpy.append(tdx, _dx) #reset i1 and flag lentoks for the next if lentoks < new_lentoks: if is_data == False: i1 = -1 #To remember the # of columns on the current line # for the next line of data lentoks = len(toks) #Reset # of header lines and counts # # of data candidate lines if j == 0 and j1 == 0: i1 = i + 1 i += 1 except: # It is data and meet non - number, then stop reading if is_data == True: break lentoks = 2 #Counting # of header lines j += 1 if j == j1 + 1: j1 = j else: j = -1 #Reset # of lines of data candidates i = -1 # Couldn't parse this line, skip it pass input_f.close() # Sanity check if has_error_dy == True and not len(y) == len(dy): msg = "ascii_reader: y and dy have different length" raise RuntimeError, msg if has_error_dx == True and not len(x) == len(dx): msg = "ascii_reader: y and dy have different length" raise RuntimeError, msg # If the data length is zero, consider this as # though we were not able to read the file. if len(x) == 0: raise RuntimeError, "ascii_reader: could not load file" #Let's re-order the data to make cal. # curve look better some cases ind = numpy.lexsort((ty, tx)) for i in ind: x[i] = tx[ind[i]] y[i] = ty[ind[i]] if has_error_dy == True: dy[i] = tdy[ind[i]] if has_error_dx == True: dx[i] = tdx[ind[i]] # Zeros in dx, dy if has_error_dx: dx[dx == 0] = _ZERO if has_error_dy: dy[dy == 0] = _ZERO #Data output.x = x[x != 0] output.y = y[x != 0] output.dy = dy[x != 0] if has_error_dy == True\ else numpy.zeros(len(output.y)) output.dx = dx[x != 0] if has_error_dx == True\ else numpy.zeros(len(output.x)) if data_conv_q is not None: output.xaxis("\\rm{Q}", output.x_unit) else: output.xaxis("\\rm{Q}", 'A^{-1}') if data_conv_i is not None: output.yaxis("\\rm{Intensity}", output.y_unit) else: output.yaxis("\\rm{Intensity}", "cm^{-1}") # Store loading process information output.meta_data['loader'] = self.type_name if len(output.x) < 1: raise RuntimeError, "%s is empty" % path return output else: raise RuntimeError, "%s is not a file" % path return None