ads2_2022/code/python/src/algorithms/hirschberg/algorithms.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# IMPORTS
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from src.thirdparty.types import *;
from src.thirdparty.maths import *;

from models.generated.config import *;
from src.models.hirschberg.penalties import *;
from src.algorithms.hirschberg.display import *;
from src.algorithms.hirschberg.matrix import *;
from src.algorithms.hirschberg.paths import *;
from src.models.hirschberg.alignment import *;

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# EXPORTS
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

__all__ = [
    'hirschberg_algorithm',
    'simple_algorithm',
];

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# METHOD hirschberg_algorithm
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def simple_algorithm(
    X:    str,
    Y:    str,
    verbose: List[EnumHirschbergVerbosity] = [],
) -> Tuple[str, str]:
    '''
    Dieser Algorithmus berechnet die Edit-Distanzen + optimale Richtungen ein Mal.
    Darus wird ein optimales Alignment direkt abgeleitet.
    '''
    Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y);
    path = reconstruct_optimal_path(Moves=Moves);
    word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path);
    if verbose != []:
        repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y, verbose=verbose);
        display = word_y + f'\n{"-"*len(word_x)}\n' + word_x;
        print(f'\n{repr}\n\n\x1b[1mOptimales Alignment:\x1b[0m\n\n{display}\n');
    return word_x, word_y;

def hirschberg_algorithm(
    X:    str,
    Y:    str,
    verbose: List[EnumHirschbergVerbosity] = [],
    show: List[EnumHirschbergShow] = [],
) -> Tuple[str, str]:
    '''
    Der Hirschberg-Algorithmus berechnet nur die Edit-Distanzen (Kostenmatrix)
    und weder speichert noch berechnet die Matrix der optimalen Richtungen.

    Dies liefert eine Platz-effizientere Methode als die simple Methode.

    Durch Rekursion wird eine Art Traceback durch die zugrunde liegende DP erreicht.
    Daraus wird unmittelbar ein optimales Alignment bestimmt.
    Des Weiteren werden Zeitkosten durch Divide-and-Conquer klein gehalten.
    '''
    align = hirschberg_algorithm_step(X=X, Y=Y, depth=1, verbose=verbose, show=show);
    word_x = align.as_string1();
    word_y = align.as_string2();

    # verbose output hier behandeln (irrelevant für Algorithmus):
    if verbose != []:
        if EnumHirschbergShow.tree in show:
            display = align.astree(braces=True);
        else:
            display_x = align.as_string1(braces=True);
            display_y = align.as_string2(braces=True);
            display = display_y + f'\n{"-"*len(display_x)}\n' + display_x;
        print(f'\n\x1b[1mOptimales Alignment:\x1b[0m\n\n{display}\n');

    return word_x, word_y;

def hirschberg_algorithm_step(
    X:     str,
    Y:     str,
    depth: int = 0,
    verbose:  List[EnumHirschbergVerbosity] = [],
    show:  List[EnumHirschbergShow] = [],
) -> Alignment:
    '''
    Der rekursive Schritt der Hirschberg-Algorithmus teil eines der Wörter in zwei
    und bestimmt eine entsprechende Aufteilung des zweiten Wortes in zwei,
    die die Edit-Distanz minimiert.

    Dies liefert uns Information über eine Stelle des optimalen Pfads durch die Kostenmatrix
    sowie eine Aufteilung des Problems in eine linke und rechte Hälfte.
    '''
    n = len(Y);
    if n == 1:
        Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y);
        path = reconstruct_optimal_path(Moves=Moves);
        word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path);

        # verbose output hier behandeln (irrelevant für Algorithmus):
        if verbose != [] and (EnumHirschbergShow.atoms in show):
            repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y, verbose=verbose);
            print(f'\n\x1b[1mRekursionstiefe: {depth}\x1b[0m\n\n{repr}')

        return AlignmentBasic(word1=word_x, word2=word_y);
    else:
        n = int(np.ceil(n/2));

        # bilde linke Hälfte vom horizontalen Wort:
        Y1 = Y[:n];
        X1 = X;

        # bilde rechte Hälfte vom horizontalen Wort (und kehre h. + v. um):
        Y2 = Y[n:][::-1];
        X2 = X[::-1];

        # Löse Teilprobleme:
        Costs1, Moves1 = compute_cost_matrix(X = '-' + X1, Y = '-' + Y1);
        Costs2, Moves2 = compute_cost_matrix(X = '-' + X2, Y = '-' + Y2);

        # verbose output hier behandeln (irrelevant für Algorithmus):
        if verbose != []:
            path1, path2 = reconstruct_optimal_path_halves(Costs1=Costs1, Costs2=Costs2, Moves1=Moves1, Moves2=Moves2);
            repr = display_cost_matrix_halves(
                Costs1 = Costs1,
                Costs2 = Costs2,
                path1  = path1,
                path2  = path2,
                X1     =  '-' + X1,
                X2     =  '-' + X2,
                Y1     =  '-' + Y1,
                Y2     =  '-' + Y2,
                verbose   = verbose,
            );
            print(f'\n\x1b[1mRekursionstiefe: {depth}\x1b[0m\n\n{repr}')

        # Koordinaten des optimalen Übergangs berechnen:
        coord1, coord2 = get_optimal_transition(Costs1=Costs1, Costs2=Costs2);
        p = coord1[0];
        # Divide and Conquer ausführen:
        align_left = hirschberg_algorithm_step(X=X[:p], Y=Y[:n], depth=depth+1, verbose=verbose, show=show);
        align_right = hirschberg_algorithm_step(X=X[p:], Y=Y[n:], depth=depth+1, verbose=verbose, show=show);

        # Resultate zusammensetzen:
        return AlignmentPair(left=align_left, right=align_right);