import numpy import os import sys import csv import scipy.io import pickle import time import datetime import glob import helperFunctions ### def readData(configFile): dataset = helperFunctions.getConfig(configFile, 'data', 'dataset', None, 'str', True) if dataset == 'uci': x,y = readUCIcsv(configFile) else: raise Exception('Unknown dataset %s!'%dataset) if x.shape[0] != y.shape[0]: raise Exception('#data = {} != #labels = {}'.format(x.shape[0],y.shape[0])) if not numpy.all(numpy.isfinite(x)): raise Exception('not numpy.all(numpy.isfinite(x))') if not numpy.all(numpy.isfinite(y)): raise Exception('not numpy.all(numpy.isfinite(y))') return x,y ### def readUCIcsv(configFile): dataFileName = helperFunctions.getConfig(configFile, 'data', 'dataFileName', None, 'str', True) labelCol = helperFunctions.getConfig(configFile, 'data', 'labelCol', None, 'int', True) forbiddenCols = helperFunctions.getConfig(configFile, 'data', 'forbiddenCols', None, 'intList', True) delimiter = helperFunctions.getConfig(configFile, 'data', 'delimiter', ',', 'str', True) quoteChar = helperFunctions.getConfig(configFile, 'data', 'quoteChar', '|', 'str', True) firstDataRowNumber = helperFunctions.getConfig(configFile, 'data', 'firstDataRowNumber', 1, 'int', True) normalizeFeatures = helperFunctions.getConfig(configFile, 'data', 'normalizeFeatures', -1, 'str', True) if delimiter == '': delimiter = ' ' ### csvFile = open(dataFileName, 'rb') csvReader = csv.reader(csvFile, delimiter=delimiter, quotechar=quoteChar) idx = 0; x = None y = None for row in csvReader: xRow = [] if idx < firstDataRowNumber: idx = idx + 1 continue for colIdx in range(len(row)): if forbiddenCols is not None and colIdx in forbiddenCols: continue elif colIdx == labelCol: yRow = float(row[colIdx]) else: xRow.append(float(row[colIdx])) if x is None: x = numpy.asmatrix(xRow, dtype=numpy.float) else: x = numpy.append(x, numpy.asmatrix(xRow), axis=0) if y is None: y = numpy.asmatrix(yRow, dtype=numpy.float) else: y = numpy.append(y, numpy.asmatrix(yRow), axis=0) csvFile.close() if normalizeFeatures == 'uci': x = helperFunctions.normalizeUCI(x) elif int(normalizeFeatures) > 0: x = helperFunctions.normalizeLP(x, int(normalizeFeatures)) return x,y