linalg2020/contacts/main.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# IMPORTS
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

import os;
import sys;
import re;
import numpy as np;
from numpy import int32;
import numpy.linalg as npLA;
import pandas as pd;
from typing import Dict;

# erzwing wd:
SOURCEDIRECTORY = os.path.dirname(os.path.realpath(__file__));
os.chdir(SOURCEDIRECTORY);

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# GLOBAL VARIABLES
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

INPUT_CONTACTS = 'kontakte.csv';
OUTPUT_GROUPS = 'teilnehmer.csv';
OUTPUT_EMAILS = 'EMAILS';
AUFGABEN: Dict[int, str] = dict();

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# HAUPTVORGANG
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def main():
    contacts = get_contacts();
    write_emails(contacts);
    contacts = assign_groups(contacts);
    contacts = assign_exercises(contacts);
    contacts = censor_data(contacts);
    write_groups(contacts);
    print_groups(contacts);
    return;

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# SEKUNDÄRVORGÄNGE
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def get_contacts() -> pd.DataFrame:
    contacts = {};
    lines = [];
    with open(INPUT_CONTACTS, 'r') as fp:
        lines = fp.readlines();
    data = [];
    for line in lines:
        line = line.strip();
        if line == '':
            continue;
        parts = line.split(r',');
        for i, part in enumerate(parts):
            parts[i] = part.strip();
        for i in [0, 1]:
            parts[i] = re.sub(r'_', r' ', parts[i]);
        data.append(dict(
            key=(parts[1] + ' ' + parts[0]).lower(),
            surname=parts[0],
            name=parts[1],
            email=parts[2],
            group=parts[3] if len(parts) >= 4 else None,
            exercise=0
        ));
    contacts = pd.DataFrame(
        data=data,
        index=None,
        columns=('key', 'surname', 'name', 'email', 'group', 'exercise')
    );
    contacts.group = pd.to_numeric(contacts.group, errors='coerce');
    contacts = contacts.astype(dict(
        key='string',
        surname='string',
        name='string',
        email='string',
        group='Int32',
        exercise='string',
    ));
    del lines;
    del data;
    return contacts;

def assign_groups(contacts: pd.DataFrame) -> pd.DataFrame:
    index = 0;
    lines = [];
    with open('GROUPS', 'r') as fp:
        lines = fp.readlines();
    for line in lines:
        line = line.strip();
        if line == '':
            continue;
        index += 1;
        pattern = [];
        for word in line.split(r','):
            key = word.strip().lower();
            if key == '':
                continue;
            pattern.append(key);
        pattern = re.compile('|'.join(pattern));
        contacts.loc[contacts.key.str.contains(pattern), 'group'] = index;
    return contacts;

def assign_exercises(contacts: pd.DataFrame) -> pd.DataFrame:
    global AUFGABEN;
    lines = [];
    with open('EXERCISES', 'r') as fp:
        lines = fp.readlines();
    for _, line in enumerate(lines):
        g = _ + 1;
        line = line.strip();
        if line == '':
            continue;
        ex = line;
        AUFGABEN[g] = ex;
    contacts.exercise = contacts.group.apply(lambda g: AUFGABEN[g] if g in AUFGABEN else pd.NA);
    return contacts;

def censor_data(contacts: pd.DataFrame) -> pd.DataFrame:
    contacts.email = contacts.email.apply(lambda i: re.sub(r'^(.*)(@.*)$', r'____\2', i));
    return contacts;

def write_emails(contacts: pd.DataFrame):
    with open(OUTPUT_EMAILS, 'w') as fp:
        line = ', '.join(list(contacts.email));
        fp.write(line + ',\n');
    return;

def write_groups(contacts: pd.DataFrame):
    contacts = contacts \
        .sort_values(
            by=['group', 'surname', 'name'],
            ascending=[True, True, True],
            na_position='last'
        ) \
        .reset_index(drop=True);
    with open(OUTPUT_GROUPS, 'w') as fp:
        fp.write('GRUPPE, AUFGABE, NACHNAME, VORNAME\n');
        for _, entry in contacts.iterrows():
            fp.write('{group}, {ex}, {surname}, {name}\n'.format(
                surname=entry.surname.upper(),
                name=entry['name'], # nötig, weil 'name' auch der Index ist
                email=entry.email,
                group=entry.group,
                # group='' if pd.isnull(contacts.group[i]) else contacts.group[i],
                ex=entry.exercise
            ));
    return;

def print_groups(contacts: pd.DataFrame):
    contacts = contacts \
        .sort_values(
            by=['group', 'name', 'surname'],
            ascending=[True, True, True],
            na_position='last'
        ) \
        .reset_index(drop=True);
    n_groups = max([0] + [_ for _ in contacts.group if isinstance(_, int32)]);
    for g in list(range(1, n_groups + 1)) + [0]:
        if g == 0:
            group = contacts[pd.isnull(contacts.group)].reset_index(drop=True);
            print('- _keiner Gruppe zugeordnet!_', end='');
        else:
            group = contacts[contacts.group == g].reset_index(drop=True);
            ex = AUFGABEN[g] if g in AUFGABEN else '???';
            print('- **Gruppe {}** | {} |'.format(g, ex), end='');
        Ng = len(group);
        for _, entry in group.iterrows():
            endchar = ',';
            if _ == Ng - 1:
                endchar = '\n';
            print(' {name}.{surname}'.format(
                surname=entry['surname'][0].upper(),
                name=entry['name'][:3].upper(),
            ), end=endchar);
    return;

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# SEKUNDÄRVORGÄNGE
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

if __name__ == '__main__':
    main();