2020-11-20 19:54:18 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
# IMPORTS
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
import os;
|
|
|
|
import sys;
|
|
|
|
import re;
|
|
|
|
import numpy as np;
|
|
|
|
from numpy import int32;
|
|
|
|
import numpy.linalg as npLA;
|
|
|
|
import pandas as pd;
|
|
|
|
from typing import Dict;
|
|
|
|
|
|
|
|
# erzwing wd:
|
|
|
|
SOURCEDIRECTORY = os.path.dirname(os.path.realpath(__file__));
|
|
|
|
os.chdir(SOURCEDIRECTORY);
|
|
|
|
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
# GLOBAL VARIABLES
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
INPUT_CONTACTS = 'kontakte.csv';
|
|
|
|
OUTPUT_GROUPS = 'teilnehmer.csv';
|
|
|
|
OUTPUT_EMAILS = 'EMAILS';
|
|
|
|
AUFGABEN: Dict[int, str] = dict();
|
|
|
|
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
# HAUPTVORGANG
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
def main():
|
|
|
|
contacts = get_contacts();
|
|
|
|
write_emails(contacts);
|
|
|
|
contacts = assign_groups(contacts);
|
|
|
|
contacts = assign_exercises(contacts);
|
|
|
|
contacts = censor_data(contacts);
|
|
|
|
write_groups(contacts);
|
|
|
|
print_groups(contacts);
|
|
|
|
return;
|
|
|
|
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
# SEKUNDÄRVORGÄNGE
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
def get_contacts() -> pd.DataFrame:
|
|
|
|
contacts = {};
|
|
|
|
lines = [];
|
|
|
|
with open(INPUT_CONTACTS, 'r') as fp:
|
|
|
|
lines = fp.readlines();
|
|
|
|
data = [];
|
|
|
|
for line in lines:
|
|
|
|
line = line.strip();
|
|
|
|
if line == '':
|
|
|
|
continue;
|
|
|
|
parts = line.split(r',');
|
|
|
|
for i, part in enumerate(parts):
|
|
|
|
parts[i] = part.strip();
|
|
|
|
for i in [0, 1]:
|
|
|
|
parts[i] = re.sub(r'_', r' ', parts[i]);
|
|
|
|
data.append(dict(
|
|
|
|
key=(parts[1] + ' ' + parts[0]).lower(),
|
|
|
|
surname=parts[0],
|
|
|
|
name=parts[1],
|
|
|
|
email=parts[2],
|
|
|
|
group=parts[3] if len(parts) >= 4 else None,
|
|
|
|
exercise=0
|
|
|
|
));
|
|
|
|
contacts = pd.DataFrame(
|
|
|
|
data=data,
|
|
|
|
index=None,
|
|
|
|
columns=('key', 'surname', 'name', 'email', 'group', 'exercise')
|
|
|
|
);
|
|
|
|
contacts.group = pd.to_numeric(contacts.group, errors='coerce');
|
|
|
|
contacts = contacts.astype(dict(
|
|
|
|
key='string',
|
|
|
|
surname='string',
|
|
|
|
name='string',
|
|
|
|
email='string',
|
|
|
|
group='Int32',
|
|
|
|
exercise='string',
|
|
|
|
));
|
|
|
|
del lines;
|
|
|
|
del data;
|
|
|
|
return contacts;
|
|
|
|
|
|
|
|
def assign_groups(contacts: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
index = 0;
|
|
|
|
lines = [];
|
|
|
|
with open('GROUPS', 'r') as fp:
|
|
|
|
lines = fp.readlines();
|
|
|
|
for line in lines:
|
|
|
|
line = line.strip();
|
|
|
|
if line == '':
|
|
|
|
continue;
|
|
|
|
index += 1;
|
|
|
|
pattern = [];
|
|
|
|
for word in line.split(r','):
|
|
|
|
key = word.strip().lower();
|
|
|
|
if key == '':
|
|
|
|
continue;
|
|
|
|
pattern.append(key);
|
|
|
|
pattern = re.compile('|'.join(pattern));
|
|
|
|
contacts.loc[contacts.key.str.contains(pattern), 'group'] = index;
|
|
|
|
return contacts;
|
|
|
|
|
|
|
|
def assign_exercises(contacts: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
global AUFGABEN;
|
|
|
|
lines = [];
|
|
|
|
with open('EXERCISES', 'r') as fp:
|
|
|
|
lines = fp.readlines();
|
|
|
|
for _, line in enumerate(lines):
|
|
|
|
g = _ + 1;
|
|
|
|
line = line.strip();
|
|
|
|
if line == '':
|
|
|
|
continue;
|
|
|
|
ex = line;
|
|
|
|
AUFGABEN[g] = ex;
|
|
|
|
contacts.exercise = contacts.group.apply(lambda g: AUFGABEN[g] if g in AUFGABEN else pd.NA);
|
|
|
|
return contacts;
|
|
|
|
|
|
|
|
def censor_data(contacts: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
contacts.email = contacts.email.apply(lambda i: re.sub(r'^(.*)(@.*)$', r'____\2', i));
|
|
|
|
return contacts;
|
|
|
|
|
|
|
|
def write_emails(contacts: pd.DataFrame):
|
|
|
|
with open(OUTPUT_EMAILS, 'w') as fp:
|
|
|
|
line = ', '.join(list(contacts.email));
|
|
|
|
fp.write(line + ',\n');
|
|
|
|
return;
|
|
|
|
|
|
|
|
def write_groups(contacts: pd.DataFrame):
|
|
|
|
contacts = contacts \
|
|
|
|
.sort_values(
|
|
|
|
by=['group', 'surname', 'name'],
|
|
|
|
ascending=[True, True, True],
|
|
|
|
na_position='last'
|
|
|
|
) \
|
|
|
|
.reset_index(drop=True);
|
|
|
|
with open(OUTPUT_GROUPS, 'w') as fp:
|
|
|
|
fp.write('GRUPPE, AUFGABE, NACHNAME, VORNAME\n');
|
|
|
|
for _, entry in contacts.iterrows():
|
|
|
|
fp.write('{group}, {ex}, {surname}, {name}\n'.format(
|
|
|
|
surname=entry.surname.upper(),
|
|
|
|
name=entry['name'], # nötig, weil 'name' auch der Index ist
|
|
|
|
email=entry.email,
|
|
|
|
group=entry.group,
|
|
|
|
# group='' if pd.isnull(contacts.group[i]) else contacts.group[i],
|
|
|
|
ex=entry.exercise
|
|
|
|
));
|
|
|
|
return;
|
|
|
|
|
|
|
|
def print_groups(contacts: pd.DataFrame):
|
|
|
|
contacts = contacts \
|
|
|
|
.sort_values(
|
|
|
|
by=['group', 'name', 'surname'],
|
|
|
|
ascending=[True, True, True],
|
|
|
|
na_position='last'
|
|
|
|
) \
|
|
|
|
.reset_index(drop=True);
|
|
|
|
n_groups = max([0] + [_ for _ in contacts.group if isinstance(_, int32)]);
|
|
|
|
for g in list(range(1, n_groups + 1)) + [0]:
|
|
|
|
if g == 0:
|
|
|
|
group = contacts[pd.isnull(contacts.group)].reset_index(drop=True);
|
|
|
|
print('- _keiner Gruppe zugeordnet!_', end='');
|
|
|
|
else:
|
|
|
|
group = contacts[contacts.group == g].reset_index(drop=True);
|
|
|
|
ex = AUFGABEN[g] if g in AUFGABEN else '???';
|
|
|
|
print('- **Gruppe {}** | {} |'.format(g, ex), end='');
|
|
|
|
Ng = len(group);
|
|
|
|
for _, entry in group.iterrows():
|
|
|
|
endchar = ',';
|
|
|
|
if _ == Ng - 1:
|
|
|
|
endchar = '\n';
|
|
|
|
print(' {name}.{surname}'.format(
|
|
|
|
surname=entry['surname'][0].upper(),
|
|
|
|
name=entry['name'][:3].upper(),
|
|
|
|
), end=endchar);
|
|
|
|
return;
|
|
|
|
|
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
2020-11-20 21:49:28 +01:00
|
|
|
# AUSFÜHRUNG
|
2020-11-20 19:54:18 +01:00
|
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main();
|