Source code for scitools.filetable

#!/usr/bin/env python
"""
Read tabular data from file into NumPy arrays and vice versa.

This module provides functions for
1) reading row-column table data from file into NumPy arrays, and
2) writing two-dimensional NumPy arrays to file in a table fashion.

  - read: Load a table with numbers into a two-dim. NumPy array.

  - write: Write a two-dim. NumPy array a in tabular form.

  - read_columns:
    As read, but the columns are returned as separate arrays instead
    of a two-dimensional array.

  - write_columns:
    As write, but the arguments are comma-separated one-dimensional
    arrays, one for each column, instead of a two-dimensional array.

The file format requires the same number of "words" (numbers)
on each line. Comment lines are allowed, but a blank line
indicates a delimiter in the data set, and lines after the blank
line will not be read.

Example: Assume we have a data file `tmp.dat` with the numbers::

  0        0.0        0.0        1.0
  1        1.0        1.0        2.0
  2        4.0        8.0       17.0
  3        9.0       27.0       82.0
  4       16.0       64.0      257.0
  5       25.0      125.0      626.0

The following session demonstrates key functions in this module::

    >>> import scitools.filetable as ft
    >>> s = open('tmp.dat', 'r')
    >>> table = ft.read(s)
    >>> s.close()
    >>> print table
    [[   0.    0.    0.    1.]
     [   1.    1.    1.    2.]
     [   2.    4.    8.   17.]
     [   3.    9.   27.   82.]
     [   4.   16.   64.  257.]
     [   5.   25.  125.  626.]]

    >>>
    >>> s = open('tmp.dat', 'r')
    >>> x, y1, y2, y3 = ft.read_columns(s)
    >>> s.close()
    >>> print x
    [ 0.  1.  2.  3.  4.  5.]
    >>> print y1
    [  0.   1.   4.   9.  16.  25.]
    >>> print y2
    [   0.    1.    8.   27.   64.  125.]
    >>> print y3
    [   1.    2.   17.   82.  257.  626.]
    >>>
    >>> s = open('tmp2.dat','w')
    >>> ft.write(s, table)
    >>> s.close()

The `tmp2.dat` file looks as follows::

    0       0       0       1
    1       1       1       2
    2       4       8       17
    3       9       27      82
    4       16      64      257
    5       25      125     626

"""
# author: Hans Petter Langtangen <hpl@ifi.uio.no>

import sys, os, re
from numpy import *

__all__ = ['read', 'read_columns', 'readfile',
           'write', 'write_columns',]

# simple version (not as effective as function read):
def read_v1(fileobj, commentchar='#'):
    """Load a table with numbers into a two-dim. NumPy array."""
    # read until next blank line:
    r = []  # total set of numbers (r[i]: numbers in i-th row)
    while True:  # might call read several times for a file
        line = fileobj.readline()
        if not line: break  # end of file
        if line.isspace(): break  # blank line
        if line[0] == commentchar: continue # treat next line
        r.append([float(s) for s in line.split()])
    return array(r)


[docs]def read(fileobj, commentchar='#'):
    """
    Load a table with numbers into a two-dim. NumPy array.
    @param fileobj: open file object.
    @param commentchar: lines starting with commentchar are skipped
    (a blank line is an array data delimiter and stops reading).
    @return: two-dimensional (row-column) NumPy array.
    """
    # based on a version by Mario Pernici <Mario.Pernici@mi.infn.it>
    location = fileobj.tell()
    import re
    commentchar = re.escape(commentchar)
    while True:
        line = fileobj.readline()
        if not line: break  # end of file
        elif line.isspace(): break  # blank line
        elif re.match(commentchar, line): continue # treat next line
        else: break

    shape1 = len(line.split())
    if shape1 == 0: return None
    fileobj.seek(location)

    blankline = re.compile('\n\s*\n',  re.M)
    commentline = re.compile('^%s[^\n]*\n' % commentchar, re.M)
    filestr = fileobj.read()
    # remove lines after a blank line
    m = re.search(blankline, filestr)
    if m:
        filestr = filestr[:m.start()+1]
    # skip lines starting with the comment character
    filestr = re.sub(commentline, '', filestr)
    a = [float(x) for x in filestr.split()]
    data = array(a)
    data.shape = (len(a)/shape1, shape1)
    return data


[docs]def read_columns(fileobj, commentchar='#'):
    """As read. Return columns as separate arrays."""
    a = read(fileobj, commentchar)
    return [a[:,i] for i in range(a.shape[1])]

# for backward compatibility:
[docs]def readfile(filename, commentchar='#'):
    """
    As read, but a filename (and not a file object) can be given.
    Return: columns as separate arrays.
    """
    f = open(filename, 'r')
    a = read(f, commentchar)
    r = [a[:,i] for i in range(a.shape[1])]
    return r



# simple write version:
def write_v1(fileobj, a):
    """Write a two-dim. NumPy array a in tabular form to fileobj."""
    if len(a.shape) != 2:
        raise TypeError("a 2D array is required, shape now is "+str(a.shape))
    for i in range(a.shape[0]):
        for j in range(a.shape[1]):
            fileobj.write(str(a[i,j]) + "\t")
        fileobj.write("\n")

# faster write version:
def write_v2(fileobj, a):
    """Write a two-dim. NumPy array a in tabular form to fileobj."""
    # written by Mario Pernici <Mario.Pernici@mi.infn.it>
    if a.shape[1] == 1:
        for i in xrange(a.shape[0]): fileobj.write('%g\n' % a[i,0])
    elif a.shape[1] == 2:
        for i in xrange(a.shape[0]): fileobj.write('%g\t%g\n' % \
                                                (a[i,0],a[i,1]))
    elif a.shape[1] == 3:
        for i in xrange(a.shape[0]): fileobj.write('%g\t%g\t%g\n' % \
                                                (a[i,0],a[i,1],a[i,2]))
    else:
        str_fmt = '%g\t'*(a.shape[1] - 1) + '%g\n'
        for i in xrange(a.shape[0]):
            fileobj.write(str_fmt % tuple(a[i]))

    # don't think about
    # out = str(a).replace('[','').replace(',', '\t')
    # fileobj.write(out + '\n')
    # the str(a) conversion is *extremely* expensive

def write_v3(fileobj, a):
    """Short and fast version, compared to write_v1 and write_v2."""
    # written by Mario Pernici <Mario.Pernici@mi.infn.it>
    fileobj.write(('%g\t'*(a.shape[1]-1) + '%g\n')*a.shape[0] % tuple(ravel(a)))

[docs]def write(fileobj, a):
    """Write a two-dim. NumPy array a in tabular form to fileobj."""
    # fastest version (of the write family of functions) so far...
    # written by Mario Pernici <Mario.Pernici@mi.infn.it>
    
    if len(a.shape) != 2:
        raise TypeError("a 2D array is required, shape now is "+str(a.shape))
    N = 512
    shape0 = a.shape[0]
    shape1 = a.shape[1]
    str_fmt = '%g\t'*(shape1 - 1) + '%g\n'
    # use a big format string
    str_fmt_N = str_fmt * N
    for i in xrange(shape0/N):
      a1 = a[i:i+N,:]
      # put a1 in  1D array form; ravel better than reshape for
      # non-contiguous arrays.
      a1 = ravel(a1)
      fileobj.write(str_fmt_N % tuple(a1))
    for i in range(shape0 - shape0%N, shape0):
      fileobj.write(str_fmt % tuple(a[i]))


[docs]def write_columns(fileobj, *columns):
    """
    As write, but the column data are represented as one-dimensional
    arrays.
    """
    a = array(columns).transpose()
    write(fileobj, a)
    
    
# testing:
def _generate(nrows, ncolumns, filename):
    f = open(filename, 'w')
    a = fromfunction(lambda i,j: i+j*j, (nrows,ncolumns))
    narrays = 3  # no of arrays in the file
    f.write('# %d arrays in this file\n#\n' % narrays)
    for i in range(narrays):
        f.write('# a new array with %d rows:  \n' % nrows)
        f.write('#\n')
        write(f, a)
        f.write('\n')  # array delimiter

def _test(n):
    _generate(n, 3, "tmp.1")
    import time
    t0 = time.clock()
    f = open("tmp.1", "r")
    narrays = int(f.readline().split()[1])
    print 'found %d arrays in file' % narrays
    for i in range(narrays):
        q = read(f)
        print 'read an array with shape', q.shape
    t1 = time.clock()
    print "read:", t1-t0, "seconds\n"
    f.close()
    t0 = time.clock()
    f = open("tmp.2", "w")
    write(f,q)
    t1 = time.clock()
    print "write:", t1-t0, "seconds"
    
    # compare with TableIO:
    try:
        import TableIO.TableIO as TableIO
    except:
        sys.exit(0) # exit silently
    t0 = time.clock()
    p = TableIO.readColumns("tmp.1", "#")
    #print 'p:', p
    t1 = time.clock()
    print "TableIO.readColumns:", t1-t0, "seconds\n"
    t0 = time.clock()
    TableIO.writeArray("tmp.3", array(p))
    t1 = time.clock()
    print "TableIO.writeArray:", t1-t0,"seconds"

if __name__ == '__main__':
    _test(100000)
Navigation

Source code for scitools.filetable

Quick search

Navigation