linalg2020/contacts/main.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# IMPORTS
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

import os;
import sys;
import re;
import numpy as np;
from numpy import int32;
import numpy.linalg as npLA;
import pandas as pd;
from typing import Dict;

# erzwing wd:
SOURCEDIRECTORY = os.path.dirname(os.path.realpath(__file__));
os.chdir(SOURCEDIRECTORY);

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# GLOBAL VARIABLES
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

INPUT_CONTACTS = 'kontakte.csv';
OUTPUT_GROUPS = 'teilnehmer.csv';
OUTPUT_EMAILS = 'EMAILS';
AUFGABEN: Dict[int, str] = dict();

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# HAUPTVORGANG
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def main():
    contacts = get_contacts();
    write_emails(contacts);
    contacts = assign_groups(contacts);
    contacts = assign_exercises(contacts);
    contacts = censor_data(contacts);
    write_groups(contacts);
    print_groups(contacts);
    return;

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# SEKUNDÄRVORGÄNGE
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def get_contacts() -> pd.DataFrame:
    contacts = {};
    lines = [];
    with open(INPUT_CONTACTS, 'r') as fp:
        lines = fp.readlines();
    data = [];
    for line in lines:
        line = line.strip();
        if line == '':
            continue;
        parts = line.split(r',');
        for i, part in enumerate(parts):
            parts[i] = part.strip();
        for i in [0, 1]:
            parts[i] = re.sub(r'_', r' ', parts[i]);
        data.append(dict(
            key=(parts[1] + ' ' + parts[0]).lower(),
            surname=parts[0],
            name=parts[1],
            email=parts[2],
            group=parts[3] if len(parts) >= 4 else None,
            exercise=0
        ));
    contacts = pd.DataFrame(
        data=data,
        index=None,
        columns=('key', 'surname', 'name', 'email', 'group', 'exercise')
    );
    contacts.group = pd.to_numeric(contacts.group, errors='coerce');
    contacts = contacts.astype(dict(
        key='string',
        surname='string',
        name='string',
        email='string',
        group='Int32',
        exercise='string',
    ));
    del lines;
    del data;
    return contacts;

def assign_groups(contacts: pd.DataFrame) -> pd.DataFrame:
    index = 0;
    lines = [];
    with open('GROUPS', 'r') as fp:
        lines = fp.readlines();
    for line in lines:
        line = line.strip();
        if line == '':
            continue;
        index += 1;
        pattern = [];
        for word in line.split(r','):
            key = word.strip().lower();
            if key == '':
                continue;
            pattern.append(key);
        pattern = re.compile('|'.join(pattern));
        contacts.loc[contacts.key.str.contains(pattern), 'group'] = index;
    return contacts;

def assign_exercises(contacts: pd.DataFrame) -> pd.DataFrame:
    global AUFGABEN;
    lines = [];
    with open('EXERCISES', 'r') as fp:
        lines = fp.readlines();
    for _, line in enumerate(lines):
        g = _ + 1;
        line = line.strip();
        if line == '':
            continue;
        ex = line;
        AUFGABEN[g] = ex;
    contacts.exercise = contacts.group.apply(lambda g: AUFGABEN[g] if g in AUFGABEN else pd.NA);
    return contacts;

def censor_data(contacts: pd.DataFrame) -> pd.DataFrame:
    contacts.email = contacts.email.apply(lambda i: re.sub(r'^(.*)(@.*)$', r'____\2', i));
    return contacts;

def write_emails(contacts: pd.DataFrame):
    with open(OUTPUT_EMAILS, 'w') as fp:
        line = ', '.join(list(contacts.email));
        fp.write(line + ',\n');
    return;

def write_groups(contacts: pd.DataFrame):
    contacts = contacts \
        .sort_values(
            by=['group', 'surname', 'name'],
            ascending=[True, True, True],
            na_position='last'
        ) \
        .reset_index(drop=True);
    with open(OUTPUT_GROUPS, 'w') as fp:
        fp.write('GRUPPE, AUFGABE, NACHNAME, VORNAME\n');
        for _, entry in contacts.iterrows():
            fp.write('{group}, {ex}, {surname}, {name}\n'.format(
                surname=entry.surname.upper(),
                name=entry['name'], # nötig, weil 'name' auch der Index ist
                email=entry.email,
                group=entry.group,
                # group='' if pd.isnull(contacts.group[i]) else contacts.group[i],
                ex=entry.exercise
            ));
    return;

def print_groups(contacts: pd.DataFrame):
    contacts = contacts \
        .sort_values(
            by=['group', 'name', 'surname'],
            ascending=[True, True, True],
            na_position='last'
        ) \
        .reset_index(drop=True);
    n_groups = max([0] + [_ for _ in contacts.group if isinstance(_, int32)]);
    for g in list(range(1, n_groups + 1)) + [0]:
        if g == 0:
            group = contacts[pd.isnull(contacts.group)].reset_index(drop=True);
            print('- _keiner Gruppe zugeordnet!_', end='');
        else:
            group = contacts[contacts.group == g].reset_index(drop=True);
            ex = AUFGABEN[g] if g in AUFGABEN else '???';
            print('- **Gruppe {}** | {} |'.format(g, ex), end='');
        Ng = len(group);
        for _, entry in group.iterrows():
            endchar = ',';
            if _ == Ng - 1:
                endchar = '\n';
            print(' {name}.{surname}'.format(
                surname=entry['surname'][0].upper(),
                name=entry['name'][:3].upper(),
            ), end=endchar);
    return;

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# AUSFÜHRUNG
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

if __name__ == '__main__':
    main();
master > master: Repo 2020-11-20 19:54:18 +01:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`

			`# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
			`# IMPORTS`
			`# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`

			`import os;`
			`import sys;`
			`import re;`
			`import numpy as np;`
			`from numpy import int32;`
			`import numpy.linalg as npLA;`
			`import pandas as pd;`
			`from typing import Dict;`

			`# erzwing wd:`
			`SOURCEDIRECTORY = os.path.dirname(os.path.realpath(__file__));`
			`os.chdir(SOURCEDIRECTORY);`

			`# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
			`# GLOBAL VARIABLES`
			`# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`

			`INPUT_CONTACTS = 'kontakte.csv';`
			`OUTPUT_GROUPS = 'teilnehmer.csv';`
			`OUTPUT_EMAILS = 'EMAILS';`
			`AUFGABEN: Dict[int, str] = dict();`

			`# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
			`# HAUPTVORGANG`
			`# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`

			`def main():`
			`contacts = get_contacts();`
			`write_emails(contacts);`
			`contacts = assign_groups(contacts);`
			`contacts = assign_exercises(contacts);`
			`contacts = censor_data(contacts);`
			`write_groups(contacts);`
			`print_groups(contacts);`
			`return;`

			`# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
			`# SEKUNDÄRVORGÄNGE`
			`# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`

			`def get_contacts() -> pd.DataFrame:`
			`contacts = {};`
			`lines = [];`
			`with open(INPUT_CONTACTS, 'r') as fp:`
			`lines = fp.readlines();`
			`data = [];`
			`for line in lines:`
			`line = line.strip();`
			`if line == '':`
			`continue;`
			`parts = line.split(r',');`
			`for i, part in enumerate(parts):`
			`parts[i] = part.strip();`
			`for i in [0, 1]:`
			`parts[i] = re.sub(r'_', r' ', parts[i]);`
			`data.append(dict(`
			`key=(parts[1] + ' ' + parts[0]).lower(),`
			`surname=parts[0],`
			`name=parts[1],`
			`email=parts[2],`
			`group=parts[3] if len(parts) >= 4 else None,`
			`exercise=0`
			`));`
			`contacts = pd.DataFrame(`
			`data=data,`
			`index=None,`
			`columns=('key', 'surname', 'name', 'email', 'group', 'exercise')`
			`);`
			`contacts.group = pd.to_numeric(contacts.group, errors='coerce');`
			`contacts = contacts.astype(dict(`
			`key='string',`
			`surname='string',`
			`name='string',`
			`email='string',`
			`group='Int32',`
			`exercise='string',`
			`));`
			`del lines;`
			`del data;`
			`return contacts;`

			`def assign_groups(contacts: pd.DataFrame) -> pd.DataFrame:`
			`index = 0;`
			`lines = [];`
			`with open('GROUPS', 'r') as fp:`
			`lines = fp.readlines();`
			`for line in lines:`
			`line = line.strip();`
			`if line == '':`
			`continue;`
			`index += 1;`
			`pattern = [];`
			`for word in line.split(r','):`
			`key = word.strip().lower();`
			`if key == '':`
			`continue;`
			`pattern.append(key);`
			`pattern = re.compile('\|'.join(pattern));`
			`contacts.loc[contacts.key.str.contains(pattern), 'group'] = index;`
			`return contacts;`

			`def assign_exercises(contacts: pd.DataFrame) -> pd.DataFrame:`
			`global AUFGABEN;`
			`lines = [];`
			`with open('EXERCISES', 'r') as fp:`
			`lines = fp.readlines();`
			`for _, line in enumerate(lines):`
			`g = _ + 1;`
			`line = line.strip();`
			`if line == '':`
			`continue;`
			`ex = line;`
			`AUFGABEN[g] = ex;`
			`contacts.exercise = contacts.group.apply(lambda g: AUFGABEN[g] if g in AUFGABEN else pd.NA);`
			`return contacts;`

			`def censor_data(contacts: pd.DataFrame) -> pd.DataFrame:`
			`contacts.email = contacts.email.apply(lambda i: re.sub(r'^(.)(@.)$', r'____\2', i));`
			`return contacts;`

			`def write_emails(contacts: pd.DataFrame):`
			`with open(OUTPUT_EMAILS, 'w') as fp:`
			`line = ', '.join(list(contacts.email));`
			`fp.write(line + ',\n');`
			`return;`

			`def write_groups(contacts: pd.DataFrame):`
			`contacts = contacts \`
			`.sort_values(`
			`by=['group', 'surname', 'name'],`
			`ascending=[True, True, True],`
			`na_position='last'`
			`) \`
			`.reset_index(drop=True);`
			`with open(OUTPUT_GROUPS, 'w') as fp:`
			`fp.write('GRUPPE, AUFGABE, NACHNAME, VORNAME\n');`
			`for _, entry in contacts.iterrows():`
			`fp.write('{group}, {ex}, {surname}, {name}\n'.format(`
			`surname=entry.surname.upper(),`
			`name=entry['name'], # nötig, weil 'name' auch der Index ist`
			`email=entry.email,`
			`group=entry.group,`
			`# group='' if pd.isnull(contacts.group[i]) else contacts.group[i],`
			`ex=entry.exercise`
			`));`
			`return;`

			`def print_groups(contacts: pd.DataFrame):`
			`contacts = contacts \`
			`.sort_values(`
			`by=['group', 'name', 'surname'],`
			`ascending=[True, True, True],`
			`na_position='last'`
			`) \`
			`.reset_index(drop=True);`
			`n_groups = max([0] + [_ for _ in contacts.group if isinstance(_, int32)]);`
			`for g in list(range(1, n_groups + 1)) + [0]:`
			`if g == 0:`
			`group = contacts[pd.isnull(contacts.group)].reset_index(drop=True);`
			`print('- _keiner Gruppe zugeordnet!_', end='');`
			`else:`
			`group = contacts[contacts.group == g].reset_index(drop=True);`
			`ex = AUFGABEN[g] if g in AUFGABEN else '???';`
			`print('- Gruppe {} \| {} \|'.format(g, ex), end='');`
			`Ng = len(group);`
			`for _, entry in group.iterrows():`
			`endchar = ',';`
			`if _ == Ng - 1:`
			`endchar = '\n';`
			`print(' {name}.{surname}'.format(`
			`surname=entry['surname'][0].upper(),`
			`name=entry['name'][:3].upper(),`
			`), end=endchar);`
			`return;`

			`# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
master > master: minor 2020-11-20 21:49:28 +01:00			`# AUSFÜHRUNG`
master > master: Repo 2020-11-20 19:54:18 +01:00			`# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`

			`if __name__ == '__main__':`
			`main();`