Source code for sas.dataloader.readers.ascii_reader

"""
    ASCII reader
"""
############################################################################
#This software was developed by the University of Tennessee as part of the
#Distributed Data Analysis of Neutron Scattering Experiments (DANSE)
#project funded by the US National Science Foundation. 
#If you use DANSE applications to do scientific research that leads to
#publication, we ask that you acknowledge the use of the software with the
#following sentence:
#This work benefited from DANSE software developed under NSF award DMR-0520547.
#copyright 2008, University of Tennessee
#############################################################################


import numpy
import os
from sas.dataloader.data_info import Data1D

# Check whether we have a converter available
has_converter = True
try:
    from sas.data_util.nxsunit import Converter
except:
    has_converter = False
_ZERO = 1e-16


[docs]class Reader:
    """
    Class to load ascii files (2, 3 or 4 columns).
    """
    ## File type
    type_name = "ASCII"
    
    ## Wildcards
    type = ["ASCII files (*.txt)|*.txt",
            "ASCII files (*.dat)|*.dat",
            "ASCII files (*.abs)|*.abs",
            "CSV files (*.csv)|*.csv"]
    ## List of allowed extensions
    ext = ['.txt', '.TXT', '.dat', '.DAT', '.abs', '.ABS', 'csv', 'CSV']
    
    ## Flag to bypass extension check
    allow_all = True
    
[docs]    def read(self, path):
        """
        Load data file
        
        :param path: file path
        
        :return: Data1D object, or None
        
        :raise RuntimeError: when the file can't be opened
        :raise ValueError: when the length of the data vectors are inconsistent
        """
        if os.path.isfile(path):
            basename = os.path.basename(path)
            _, extension = os.path.splitext(basename)
            if self.allow_all or extension.lower() in self.ext:
                try:
                    # Read in binary mode since GRASP frequently has no-ascii
                    # characters that brakes the open operation
                    input_f = open(path,'rb')
                except:
                    raise  RuntimeError, "ascii_reader: cannot open %s" % path
                buff = input_f.read()
                lines = buff.split('\n')
                
                #Jae could not find python universal line spliter:
                #keep the below for now
                # some ascii data has \r line separator,
                # try it when the data is on only one long line
                if len(lines) < 2 :
                    lines = buff.split('\r')
                 
                x  = numpy.zeros(0)
                y  = numpy.zeros(0)
                dy = numpy.zeros(0)
                dx = numpy.zeros(0)
                
               #temp. space to sort data
                tx  = numpy.zeros(0)
                ty  = numpy.zeros(0)
                tdy = numpy.zeros(0)
                tdx = numpy.zeros(0)
                
                output = Data1D(x, y, dy=dy, dx=dx)
                self.filename = output.filename = basename
           
                data_conv_q = None
                data_conv_i = None
                
                if has_converter == True and output.x_unit != '1/A':
                    data_conv_q = Converter('1/A')
                    # Test it
                    data_conv_q(1.0, output.x_unit)
                    
                if has_converter == True and output.y_unit != '1/cm':
                    data_conv_i = Converter('1/cm')
                    # Test it
                    data_conv_i(1.0, output.y_unit)
           
                # The first good line of data will define whether
                # we have 2-column or 3-column ascii
                has_error_dx = None
                has_error_dy = None
                
                #Initialize counters for data lines and header lines.
                is_data = False  # Has more than 5 lines
                # More than "5" lines of data is considered as actual
                # data unless that is the only data
                mum_data_lines = 5
                # To count # of current data candidate lines
                i = -1
                # To count total # of previous data candidate lines
                i1 = -1
                # To count # of header lines
                j = -1
                # Helps to count # of header lines
                j1 = -1
                #minimum required number of columns of data; ( <= 4).
                lentoks = 2
                for line in lines:
                    # Initial try for CSV (split on ,)
                    toks = line.split(',')
                    # Now try SCSV (split on ;)
                    if len(toks) < 2:
                        toks = line.split(';')
                    # Now go for whitespace                    
                    if len(toks) < 2:
                        toks = line.split()
                    try:
                        #Make sure that all columns are numbers.
                        for colnum in range(len(toks)):
                            float(toks[colnum])
                            
                        _x = float(toks[0])
                        _y = float(toks[1])
                        
                        #Reset the header line counters
                        if j == j1:
                            j = 0
                            j1 = 0
                            
                        if i > 1:
                            is_data = True
                        
                        if data_conv_q is not None:
                            _x = data_conv_q(_x, units=output.x_unit)
                            
                        if data_conv_i is not None:
                            _y = data_conv_i(_y, units=output.y_unit)
                        
                        # If we have an extra token, check
                        # whether it can be interpreted as a
                        # third column.
                        _dy = None
                        if len(toks) > 2:
                            try:
                                _dy = float(toks[2])
                                
                                if data_conv_i is not None:
                                    _dy = data_conv_i(_dy, units=output.y_unit)
                                
                            except:
                                # The third column is not a float, skip it.
                                pass
                            
                        # If we haven't set the 3rd column
                        # flag, set it now.
                        if has_error_dy == None:
                            has_error_dy = False if _dy == None else True
                            
                        #Check for dx
                        _dx = None
                        if len(toks) > 3:
                            try:
                                _dx = float(toks[3])
                                
                                if data_conv_i is not None:
                                    _dx = data_conv_i(_dx, units=output.x_unit)
                                
                            except:
                                # The 4th column is not a float, skip it.
                                pass
                            
                        # If we haven't set the 3rd column
                        # flag, set it now.
                        if has_error_dx == None:
                            has_error_dx = False if _dx == None else True
                        
                        #After talked with PB, we decided to take care of only
                        # 4 columns of data for now.
                        #number of columns in the current line
                        #To remember the # of columns in the current
                        #line of data
                        new_lentoks = len(toks)
                        
                        #If the previous columns not equal to the current,
                        #mark the previous as non-data and reset the dependents.
                        if lentoks != new_lentoks:
                            if is_data == True:
                                break
                            else:
                                i = -1
                                i1 = 0
                                j = -1
                                j1 = -1
                            
                        #Delete the previously stored lines of data candidates
                        # if is not data.
                        if i < 0 and -1 < i1 < mum_data_lines and \
                            is_data == False:
                            try:
                                x = numpy.zeros(0)
                                y = numpy.zeros(0)
                            except:
                                pass
                            
                        x = numpy.append(x, _x) 
                        y = numpy.append(y, _y)
                        
                        if has_error_dy == True:
                            #Delete the previously stored lines of
                            # data candidates if is not data.
                            if i < 0 and -1 < i1 < mum_data_lines and \
                                is_data == False:
                                try:
                                    dy = numpy.zeros(0)
                                except:
                                    pass
                            dy = numpy.append(dy, _dy)
                            
                        if has_error_dx == True:
                            #Delete the previously stored lines of
                            # data candidates if is not data.
                            if i < 0 and -1 < i1 < mum_data_lines and \
                                is_data == False:
                                try:
                                    dx = numpy.zeros(0)
                                except:
                                    pass
                            dx = numpy.append(dx, _dx)
                            
                        #Same for temp.
                        #Delete the previously stored lines of data candidates
                        # if is not data.
                        if i < 0 and -1 < i1 < mum_data_lines and\
                            is_data == False:
                            try:
                                tx = numpy.zeros(0)
                                ty = numpy.zeros(0)
                            except:
                                pass

                        tx = numpy.append(tx, _x)
                        ty = numpy.append(ty, _y)
                        
                        if has_error_dy == True:
                            #Delete the previously stored lines of
                            # data candidates if is not data.
                            if i < 0 and -1 < i1 < mum_data_lines and \
                                is_data == False:
                                try:
                                    tdy = numpy.zeros(0)
                                except:
                                    pass
                            tdy = numpy.append(tdy, _dy)
                        if has_error_dx == True:
                            #Delete the previously stored lines of
                            # data candidates if is not data.
                            if i < 0 and -1 < i1 < mum_data_lines and \
                                is_data == False:
                                try:
                                    tdx = numpy.zeros(0)
                                except:
                                    pass
                            tdx = numpy.append(tdx, _dx)

                        #reset i1 and flag lentoks for the next
                        if lentoks < new_lentoks:
                            if is_data == False:
                                i1 = -1
                        #To remember the # of columns on the current line
                        # for the next line of data
                        lentoks = len(toks)
                        
                        #Reset # of header lines and counts #
                        # of data candidate lines
                        if j == 0 and j1 == 0:
                            i1 = i + 1
                        i += 1
                    except:

                        # It is data and meet non - number, then stop reading
                        if is_data == True:
                            break
                        lentoks = 2
                        #Counting # of header lines
                        j += 1
                        if j == j1 + 1:
                            j1 = j
                        else:
                            j = -1
                        #Reset # of lines of data candidates
                        i = -1
                        
                        # Couldn't parse this line, skip it
                        pass
                    
                input_f.close()
                # Sanity check
                if has_error_dy == True and not len(y) == len(dy):
                    msg = "ascii_reader: y and dy have different length"
                    raise RuntimeError, msg
                if has_error_dx == True and not len(x) == len(dx):
                    msg = "ascii_reader: y and dy have different length"
                    raise RuntimeError, msg
                # If the data length is zero, consider this as
                # though we were not able to read the file.
                if len(x) == 0:
                    raise RuntimeError, "ascii_reader: could not load file"
                
                #Let's re-order the data to make cal.
                # curve look better some cases
                ind = numpy.lexsort((ty, tx))
                for i in ind:
                    x[i] = tx[ind[i]]
                    y[i] = ty[ind[i]]
                    if has_error_dy == True:
                        dy[i] = tdy[ind[i]]
                    if has_error_dx == True:
                        dx[i] = tdx[ind[i]]
                # Zeros in dx, dy
                if has_error_dx:
                    dx[dx == 0] = _ZERO
                if has_error_dy:
                    dy[dy == 0] = _ZERO
                #Data
                output.x = x[x != 0]
                output.y = y[x != 0]
                output.dy = dy[x != 0] if has_error_dy == True\
                    else numpy.zeros(len(output.y))
                output.dx = dx[x != 0] if has_error_dx == True\
                    else numpy.zeros(len(output.x))
                                
                if data_conv_q is not None:
                    output.xaxis("\\rm{Q}", output.x_unit)
                else:
                    output.xaxis("\\rm{Q}", 'A^{-1}')
                if data_conv_i is not None:
                    output.yaxis("\\rm{Intensity}", output.y_unit)
                else:
                    output.yaxis("\\rm{Intensity}", "cm^{-1}")
                    
                # Store loading process information
                output.meta_data['loader'] = self.type_name
                if len(output.x) < 1:
                    raise RuntimeError, "%s is empty" % path
                return output
            
        else:
            raise RuntimeError, "%s is not a file" % path
        return None
Navigation

Source code for sas.dataloader.readers.ascii_reader

Quick search

Navigation