linalg2020/contacts/main.py

189 lines
5.8 KiB
Python
Raw Normal View History

2020-11-20 19:54:18 +01:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# IMPORTS
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
import os;
import sys;
import re;
import numpy as np;
from numpy import int32;
import numpy.linalg as npLA;
import pandas as pd;
from typing import Dict;
# erzwing wd:
SOURCEDIRECTORY = os.path.dirname(os.path.realpath(__file__));
os.chdir(SOURCEDIRECTORY);
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# GLOBAL VARIABLES
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
INPUT_CONTACTS = 'kontakte.csv';
OUTPUT_GROUPS = 'teilnehmer.csv';
OUTPUT_EMAILS = 'EMAILS';
AUFGABEN: Dict[int, str] = dict();
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# HAUPTVORGANG
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def main():
contacts = get_contacts();
write_emails(contacts);
contacts = assign_groups(contacts);
contacts = assign_exercises(contacts);
contacts = censor_data(contacts);
write_groups(contacts);
print_groups(contacts);
return;
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# SEKUNDÄRVORGÄNGE
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def get_contacts() -> pd.DataFrame:
contacts = {};
lines = [];
with open(INPUT_CONTACTS, 'r') as fp:
lines = fp.readlines();
data = [];
for line in lines:
line = line.strip();
if line == '':
continue;
parts = line.split(r',');
for i, part in enumerate(parts):
parts[i] = part.strip();
for i in [0, 1]:
parts[i] = re.sub(r'_', r' ', parts[i]);
data.append(dict(
key=(parts[1] + ' ' + parts[0]).lower(),
surname=parts[0],
name=parts[1],
email=parts[2],
group=parts[3] if len(parts) >= 4 else None,
exercise=0
));
contacts = pd.DataFrame(
data=data,
index=None,
columns=('key', 'surname', 'name', 'email', 'group', 'exercise')
);
contacts.group = pd.to_numeric(contacts.group, errors='coerce');
contacts = contacts.astype(dict(
key='string',
surname='string',
name='string',
email='string',
group='Int32',
exercise='string',
));
del lines;
del data;
return contacts;
def assign_groups(contacts: pd.DataFrame) -> pd.DataFrame:
index = 0;
lines = [];
with open('GROUPS', 'r') as fp:
lines = fp.readlines();
for line in lines:
line = line.strip();
if line == '':
continue;
index += 1;
pattern = [];
for word in line.split(r','):
key = word.strip().lower();
if key == '':
continue;
pattern.append(key);
pattern = re.compile('|'.join(pattern));
contacts.loc[contacts.key.str.contains(pattern), 'group'] = index;
return contacts;
def assign_exercises(contacts: pd.DataFrame) -> pd.DataFrame:
global AUFGABEN;
lines = [];
with open('EXERCISES', 'r') as fp:
lines = fp.readlines();
for _, line in enumerate(lines):
g = _ + 1;
line = line.strip();
if line == '':
continue;
ex = line;
AUFGABEN[g] = ex;
contacts.exercise = contacts.group.apply(lambda g: AUFGABEN[g] if g in AUFGABEN else pd.NA);
return contacts;
def censor_data(contacts: pd.DataFrame) -> pd.DataFrame:
contacts.email = contacts.email.apply(lambda i: re.sub(r'^(.*)(@.*)$', r'____\2', i));
return contacts;
def write_emails(contacts: pd.DataFrame):
with open(OUTPUT_EMAILS, 'w') as fp:
line = ', '.join(list(contacts.email));
fp.write(line + ',\n');
return;
def write_groups(contacts: pd.DataFrame):
contacts = contacts \
.sort_values(
by=['group', 'surname', 'name'],
ascending=[True, True, True],
na_position='last'
) \
.reset_index(drop=True);
with open(OUTPUT_GROUPS, 'w') as fp:
fp.write('GRUPPE, AUFGABE, NACHNAME, VORNAME\n');
for _, entry in contacts.iterrows():
fp.write('{group}, {ex}, {surname}, {name}\n'.format(
surname=entry.surname.upper(),
name=entry['name'], # nötig, weil 'name' auch der Index ist
email=entry.email,
group=entry.group,
# group='' if pd.isnull(contacts.group[i]) else contacts.group[i],
ex=entry.exercise
));
return;
def print_groups(contacts: pd.DataFrame):
contacts = contacts \
.sort_values(
by=['group', 'name', 'surname'],
ascending=[True, True, True],
na_position='last'
) \
.reset_index(drop=True);
n_groups = max([0] + [_ for _ in contacts.group if isinstance(_, int32)]);
for g in list(range(1, n_groups + 1)) + [0]:
if g == 0:
group = contacts[pd.isnull(contacts.group)].reset_index(drop=True);
print('- _keiner Gruppe zugeordnet!_', end='');
else:
group = contacts[contacts.group == g].reset_index(drop=True);
ex = AUFGABEN[g] if g in AUFGABEN else '???';
print('- **Gruppe {}** | {} |'.format(g, ex), end='');
Ng = len(group);
for _, entry in group.iterrows():
endchar = ',';
if _ == Ng - 1:
endchar = '\n';
print(' {name}.{surname}'.format(
surname=entry['surname'][0].upper(),
name=entry['name'][:3].upper(),
), end=endchar);
return;
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# SEKUNDÄRVORGÄNGE
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if __name__ == '__main__':
main();