# Re: find max and min values from a column of a csv file

prakash jp wrote:

Could any one help to get max and min values from a specified column of a
csv file. The* csv file is large* and hence the below code did go bad.
*Alternate
methods would be highly appreciated
**
minimum.py*:
import csv
filename = "testLog_4.csv"
f = open(filename)
def col_min(mincname):
with open(filename, 'rb') as f:
#print mincname
#print col_index
least = min(rec[col_index] for rec in reader)
print least

col_min(str("Server Latency"))
col_min(str("Client Latency"))
f.close()

I don't see a way to make this much faster as it is probably already i/o-
bound. You can try and calculate all four extrema at once, so you have to
iterate over the file only once:

\$ cat csv_minmax_chunked.py
#!/usr/bin/env python
import csv
import locale
from itertools import islice

def make_key(index, type):
def key(row):
return type(row[index])
return key

class Aggregator(object):
def __init__(self, name, index, type, key=None):
self.name = name
self.index = index
self.type = type or float
self.getvalue = make_key(index, type)
self.key = {"key": key} if key is not None else {}
def __repr__(self):
return "{0}(name={1}, index={2}, type={3}".format(
type(self).__name__, self.name, self.index, self.type)
def __str__(self):
try:
return "%s: %s...%s" % (self.name, self.min, self.max)
except AttributeError:
return "%s: (no values)" % self.name
def update(self, rows):
values = map(self.getvalue, rows)
chunkmin = min(values, **self.key)
chunkmax = max(values, **self.key)
try:
curmin = self.min
curmax = self.max
except AttributeError:
self.min = chunkmin
self.max = chunkmax
else:
self.min = min(curmin, chunkmin, **self.key)
self.max = max(curmax, chunkmax, **self.key)

def identity(s):
return s

convert = {
"float": (float,),
"int": (int,),
"text": (lambda s: s, locale.strxfrm),
"": (identity,)
}

def main():
locale.setlocale(locale.LC_ALL, "")

import argparse
parser = argparse.ArgumentParser()
args = parser.parse_args()

ntpairs = (column.partition(":")[::2] for column in args.columns)
with open(args.csvfile, "rb") as instream:
header = dict((name, index) for index, name in
enumerate(next(rows)))
aggregators = [
Aggregator(
"{0}({1})".format(name, type or "str"),
*convert[type])
for name, type in ntpairs]
chunksize = args.chunksize
while True:
chunk = list(islice(rows, chunksize))
if not chunk:
break
for agg in aggregators:
agg.update(chunk)
for agg in aggregators:
print agg

if __name__ == "__main__":
main()
alpha,beta,gamma,delta
69,-454,460960,ELIJAH
92,796,278885,SUFFICIENCY
42,895,934523,PIONEERED
88,-213,360633,reassert
63,-518,327010,mcmahon
84,604,540764,deprecatory
83,117,621832,VEGETARIANISM'S
61,411,844243,tan
29,-147,982043,plushest
\$ ./csv_minmax_chunked.py sample.csv alpha:int beta:int beta gamma:float
delta delta:text
alpha(int): 0...99
beta(int): -1000...999
beta(str): -1...999
gamma(float): 0.0...999997.0
delta(str): AALIYAH...études
delta(text): a...ZYUGANOV'S

.

