Data Munging

Possible solutions to Data Munging kata described at CodeKata.

Part #1

""" data_munging_1.py """

# usage: python3 data_munging_1.py < weather.dat

import sys


occurred_on_day = 0
minimum_spread = 1000

with sys.stdin as text_input:
    for line in text_input:
        values = line.split()[:3]
        try:
            day = int(values[0])
        except (IndexError, ValueError):
            continue
        maximum = int(values[1].replace('*', ''))
        minimum = int(values[2].replace('*', ''))
        spread = maximum - minimum
        if spread < minimum_spread:
            occurred_on_day = day
            minimum_spread = spread

print(occurred_on_day)

Part #2

""" data_munging_2 """

# usage: python3 data_munging_2.py < football.dat

import sys


minimum_spread = 1000
occurred_for_team = ''

with sys.stdin as text_input:
    for line in text_input:
        values = line.split()
        if values[0] == 'Team' or '-' in values[0]:
            continue
        team = values[1]
        spread = abs(int(values[6]) - int(values[8]))
        if spread < minimum_spread:
            occurred_for_team = team
            minimum_spread = spread

print(occurred_for_team)

Part #3

""" data_munging_3.py """

# usage: python3 data_munging_3.py key col col < table.dat

# usage: python3 data_munging_3.py Dy MxT Mnt < weather.dat
# usage: python3 data_munging_3.py Team F A < football.dat

import sys


def text2int(text):
    """ strip * """
    return int(text.replace('*', ''))


minimum_occurred_with_key = None
minimum_spread = sys.maxsize

with sys.stdin as text_input:
    header = next(text_input)
    cols = tuple(header.find(x) for x in sys.argv[1:4])
    for line in text_input:
        try:
            key, x1, x2 = (line[i:].split()[0] for i in cols)
        except IndexError:
            continue

        try:
            spread = abs(text2int(x1) - text2int(x2))
        except ValueError:
            continue

        if spread < minimum_spread:
            minimum_occurred_with_key = key
            minimum_spread = spread

print(minimum_occurred_with_key)