Source code for sas.sascalc.dataloader.readers.ascii_reader

"""
    ASCII reader
"""
############################################################################
#This software was developed by the University of Tennessee as part of the
#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
#project funded by the US National Science Foundation. 
#If you use DANSE applications to do scientific research that leads to
#publication, we ask that you acknowledge the use of the software with the
#following sentence:
#This work benefited from DANSE software developed under NSF award DMR-0520547.
#copyright 2008, University of Tennessee
#############################################################################


import numpy
import os
from sas.sascalc.dataloader.data_info import Data1D

# Check whether we have a converter available
has_converter = True
try:
    from sas.sascalc.data_util.nxsunit import Converter
except:
    has_converter = False
_ZERO = 1e-16


[docs]class Reader: """ Class to load ascii files (2, 3 or 4 columns). """ ## File type type_name = "ASCII" ## Wildcards type = ["ASCII files (*.txt)|*.txt", "ASCII files (*.dat)|*.dat", "ASCII files (*.abs)|*.abs", "CSV files (*.csv)|*.csv"] ## List of allowed extensions ext = ['.txt', '.TXT', '.dat', '.DAT', '.abs', '.ABS', 'csv', 'CSV'] ## Flag to bypass extension check allow_all = True
[docs] def read(self, path): """ Load data file :param path: file path :return: Data1D object, or None :raise RuntimeError: when the file can't be opened :raise ValueError: when the length of the data vectors are inconsistent """ if os.path.isfile(path): basename = os.path.basename(path) _, extension = os.path.splitext(basename) if self.allow_all or extension.lower() in self.ext: try: # Read in binary mode since GRASP frequently has no-ascii # characters that breaks the open operation input_f = open(path,'rb') except: raise RuntimeError, "ascii_reader: cannot open %s" % path buff = input_f.read() lines = buff.splitlines() # Arrays for data storage tx = numpy.zeros(0) ty = numpy.zeros(0) tdy = numpy.zeros(0) tdx = numpy.zeros(0) # The first good line of data will define whether # we have 2-column or 3-column ascii has_error_dx = None has_error_dy = None #Initialize counters for data lines and header lines. is_data = False # More than "5" lines of data is considered as actual # data unless that is the only data min_data_pts = 5 # To count # of current data candidate lines candidate_lines = 0 # To count total # of previous data candidate lines candidate_lines_previous = 0 #minimum required number of columns of data lentoks = 2 for line in lines: toks = self.splitline(line) # To remember the # of columns in the current line of data new_lentoks = len(toks) try: if new_lentoks == 1 and not is_data: ## If only one item in list, no longer data raise ValueError elif new_lentoks == 0: ## If the line is blank, skip and continue on ## In case of breaks within data sets. continue elif new_lentoks != lentoks and is_data: ## If a footer is found, break the loop and save the data break elif new_lentoks != lentoks and not is_data: ## If header lines are numerical candidate_lines = 0 candidate_lines_previous = 0 #Make sure that all columns are numbers. for colnum in range(len(toks)): # Any non-floating point values throw ValueError float(toks[colnum]) candidate_lines += 1 _x = float(toks[0]) _y = float(toks[1]) _dx = None _dy = None #If 5 or more lines, this is considering the set data if candidate_lines >= min_data_pts: is_data = True # If a 3rd row is present, consider it dy if new_lentoks > 2: _dy = float(toks[2]) has_error_dy = False if _dy == None else True # If a 4th row is present, consider it dx if new_lentoks > 3: _dx = float(toks[3]) has_error_dx = False if _dx == None else True # Delete the previously stored lines of data candidates if # the list is not data if candidate_lines == 1 and -1 < candidate_lines_previous < min_data_pts and \ is_data == False: try: tx = numpy.zeros(0) ty = numpy.zeros(0) tdy = numpy.zeros(0) tdx = numpy.zeros(0) except: pass if has_error_dy == True: tdy = numpy.append(tdy, _dy) if has_error_dx == True: tdx = numpy.append(tdx, _dx) tx = numpy.append(tx, _x) ty = numpy.append(ty, _y) #To remember the # of columns on the current line # for the next line of data lentoks = new_lentoks candidate_lines_previous = candidate_lines except ValueError: # It is data and meet non - number, then stop reading if is_data == True: break lentoks = 2 has_error_dx = None has_error_dy = None #Reset # of lines of data candidates candidate_lines = 0 except: pass input_f.close() if not is_data: msg = "ascii_reader: x has no data" raise RuntimeError, msg # Sanity check if has_error_dy == True and not len(ty) == len(tdy): msg = "ascii_reader: y and dy have different length" raise RuntimeError, msg if has_error_dx == True and not len(tx) == len(tdx): msg = "ascii_reader: y and dy have different length" raise RuntimeError, msg # If the data length is zero, consider this as # though we were not able to read the file. if len(tx) == 0: raise RuntimeError, "ascii_reader: could not load file" #Let's re-order the data to make cal. # curve look better some cases ind = numpy.lexsort((ty, tx)) x = numpy.zeros(len(tx)) y = numpy.zeros(len(ty)) dy = numpy.zeros(len(tdy)) dx = numpy.zeros(len(tdx)) output = Data1D(x, y, dy=dy, dx=dx) self.filename = output.filename = basename for i in ind: x[i] = tx[ind[i]] y[i] = ty[ind[i]] if has_error_dy == True: dy[i] = tdy[ind[i]] if has_error_dx == True: dx[i] = tdx[ind[i]] # Zeros in dx, dy if has_error_dx: dx[dx == 0] = _ZERO if has_error_dy: dy[dy == 0] = _ZERO #Data output.x = x[x != 0] output.y = y[x != 0] output.dy = dy[x != 0] if has_error_dy == True\ else numpy.zeros(len(output.y)) output.dx = dx[x != 0] if has_error_dx == True\ else numpy.zeros(len(output.x)) output.xaxis("\\rm{Q}", 'A^{-1}') output.yaxis("\\rm{Intensity}", "cm^{-1}") # Store loading process information output.meta_data['loader'] = self.type_name if len(output.x) < 1: raise RuntimeError, "%s is empty" % path return output else: raise RuntimeError, "%s is not a file" % path return None
[docs] def splitline(self, line): """ Splits a line into pieces based on common delimeters :param line: A single line of text :return: list of values """ # Initial try for CSV (split on ,) toks = line.split(',') # Now try SCSV (split on ;) if len(toks) < 2: toks = line.split(';') # Now go for whitespace if len(toks) < 2: toks = line.split() return toks