Data Munging
4 July 2024
Possible solutions to Data Munging kata described at CodeKata.
Part #1
""" data_munging_1.py """
# usage: python3 data_munging_1.py < weather.dat
import sys
occurred_on_day = 0
minimum_spread = 1000
with sys.stdin as text_input:
for line in text_input:
values = line.split()[:3]
try:
day = int(values[0])
except (IndexError, ValueError):
continue
maximum = int(values[1].replace('*', ''))
minimum = int(values[2].replace('*', ''))
spread = maximum - minimum
if spread < minimum_spread:
occurred_on_day = day
minimum_spread = spread
print(occurred_on_day)
Part #2
""" data_munging_2 """
# usage: python3 data_munging_2.py < football.dat
import sys
minimum_spread = 1000
occurred_for_team = ''
with sys.stdin as text_input:
for line in text_input:
values = line.split()
if values[0] == 'Team' or '-' in values[0]:
continue
team = values[1]
spread = abs(int(values[6]) - int(values[8]))
if spread < minimum_spread:
occurred_for_team = team
minimum_spread = spread
print(occurred_for_team)
Part #3
""" data_munging_3.py """
# usage: python3 data_munging_3.py key col col < table.dat
# usage: python3 data_munging_3.py Dy MxT Mnt < weather.dat
# usage: python3 data_munging_3.py Team F A < football.dat
import sys
def text2int(text):
""" strip * """
return int(text.replace('*', ''))
minimum_occurred_with_key = None
minimum_spread = sys.maxsize
with sys.stdin as text_input:
header = next(text_input)
cols = tuple(header.find(x) for x in sys.argv[1:4])
for line in text_input:
try:
key, x1, x2 = (line[i:].split()[0] for i in cols)
except IndexError:
continue
try:
spread = abs(text2int(x1) - text2int(x2))
except ValueError:
continue
if spread < minimum_spread:
minimum_occurred_with_key = key
minimum_spread = spread
print(minimum_occurred_with_key)