master > master: code py - hirschberg darstellungen verbessert

2022-06-09 08:48:09 +02:00 · 2022-06-09 08:48:09 +02:00 · 9c5b88b64d
commit 9c5b88b64d
parent 14a882e9d3
2 changed files with 144 additions and 103 deletions
--- a/code/python/src/main.py
+++ b/code/python/src/main.py
@ -41,9 +41,18 @@ def enter():
    #     verbose=True,
    # );
    ## Beispiel für Seminarwoche 10 (Blatt 9):
-    hirschberg_algorithm_full(
-        X = 'ACGAAG',
-        Y = 'AGAT',
+    # hirschberg_algorithm_once(
+    hirschberg_algorithm(
+        # Y = 'ANSPANNEN',
+        # X = 'ANSTRENGEN',
+        # Y = 'AGAT',
+        # X = 'ACGAAG',
+        # Y = 'apple',
+        X = 'happily',
+        Y = 'apple',
+        # X = 'happily',
+        # Y = 'nei wolle elli wien',
+        # X = 'nie will elli wein',
        verbose = True,
    );
    return;
--- a/code/python/src/string_alignment/hirschberg.py
+++ b/code/python/src/string_alignment/hirschberg.py
@ -17,7 +17,7 @@ from src.local.maths import *;

 __all__ = [
    'hirschberg_algorithm',
-    'hirschberg_algorithm_full',
+    'hirschberg_algorithm_once',
 ];

 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -40,36 +40,55 @@ def missmatch_penalty(x: str, y: str):
 # METHOD hirschberg_algorithm
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+def hirschberg_algorithm_once(
+    X: str,
+    Y: str,
+    verbose: bool = False,
+) -> Tuple[str, str]:
+    Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y);
+    path = reconstruct_optimal_path(Moves=Moves);
+    word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path);
+    if verbose:
+        repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y);
+        print(f'\n{repr}');
+        print(f'\n\x1b[1mOptimales Alignment:\x1b[0m');
+        print(word_y);
+        print(len(word_x) * '-');
+        print(word_x);
+        print('');
+    return word_x, word_y;
+
 def hirschberg_algorithm(
    X: str,
    Y: str,
    verbose: bool = False,
 ) -> Tuple[str, str]:
-    Costs, Moves = hirschberg_match_matrix(X = '-' + X, Y = '-' + Y);
-    path = reconstruct_optimal_path(Moves=Moves);
-    word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, Moves=Moves, path=path);
+    alignments_x, alignments_y = hirschberg_algorithm_step(X=X, Y=Y, depth=1, verbose=verbose);
+    word_x = ''.join(alignments_x);
+    word_y = ''.join(alignments_y);
    if verbose:
-        L = len(word_x);
-        costs_repr, moves_repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y);
+        display_x = '|'.join(alignments_x);
+        display_y = '|'.join(alignments_y);
+        print(f'\n\x1b[1mOptimales Alignment:\x1b[0m');
+        print(display_y);
+        print(len(display_x) * '-');
+        print(display_x);
        print('');
-        print('\x1b[1mAlignment:\x1b[0m');
-        print(f'  {word_y}');
-        print(f'  {L*"-"}');
-        print(f'  {word_x}');
-        print('');
-        print(costs_repr);
-        print('');
-        print(moves_repr);
    return word_x, word_y;

-def hirschberg_algorithm_full(
+def hirschberg_algorithm_step(
    X: str,
    Y: str,
    depth: int = 0,
    verbose: bool = False,
-) -> Tuple[str, str]:
+) -> Tuple[List[str], List[str]]:
    n = len(Y);
-    if n > 1:
+    if n == 1:
+        Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y);
+        path = reconstruct_optimal_path(Moves=Moves);
+        word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path);
+        return [word_x], [word_y];
+    else:
        n = int(np.ceil(n/2));

        # bilde linke Hälfte vom horizontalen Wort:
@ -81,20 +100,12 @@ def hirschberg_algorithm_full(
        X2 = X[::-1];

        # Löse Teilprobleme:
-        Costs1, Moves1 = hirschberg_match_matrix(X = '-' + X1, Y = '-' + Y1);
-        Costs2, Moves2 = hirschberg_match_matrix(X = '-' + X2, Y = '-' + Y2);
-        path1, path2 = reconstruct_optimal_path_halves(
-            Costs1=Costs1,
-            Costs2=Costs2,
-            Moves1=Moves1,
-            Moves2=Moves2,
-        );
-        word_x_1, word_y_1 = reconstruct_words(X = '-' + X1, Y = '-' + Y1, Moves=Moves1, path=path1);
-        word_x_2, word_y_2 = reconstruct_words(X = '-' + X2, Y = '-' + Y2, Moves=Moves2, path=path2);
+        Costs1, Moves1 = compute_cost_matrix(X = '-' + X1, Y = '-' + Y1);
+        Costs2, Moves2 = compute_cost_matrix(X = '-' + X2, Y = '-' + Y2);

        if verbose:
-            L = len(word_x_1) + len(word_x_2);
-            costs_repr, moves_repr = display_cost_matrix_halves(
+            path1, path2 = reconstruct_optimal_path_halves(Costs1=Costs1, Costs2=Costs2, Moves1=Moves1, Moves2=Moves2);
+            repr = display_cost_matrix_halves(
                Costs1 = Costs1,
                Costs2 = Costs2,
                path1  = path1,
@ -104,39 +115,28 @@ def hirschberg_algorithm_full(
                Y1     =  '-' + Y1,
                Y2     =  '-' + Y2,
            );
-            print('');
-            print(f'\x1b[1mRekursionstiefe: {depth}\x1b[0m')
-            print('');
-            print('\x1b[1mAlignment:\x1b[0m');
-            print(f'  {word_y_1} {word_y_2[::-1]}');
-            print(f'  {(L+1)*"-"}');
-            print(f'  {word_x_1} {word_x_2[::-1]}');
-            print('');
-            print(moves_repr);
+            print(f'\n\x1b[1mRekursionstiefe: {depth}\x1b[0m\n\n{repr}')

-        coord = path1[-1];
-        m = coord[0];
-        word_x_1, word_y_1 = hirschberg_algorithm_full(X=X[:m], Y=Y[:n], depth=depth+1, verbose=True);
-        word_x_2, word_y_2 = hirschberg_algorithm_full(X=X[m:], Y=Y[n:], depth=depth+1, verbose=True);
-        word_x = word_x_1 + word_x_2;
-        word_y = word_y_1 + word_y_2;
-    else:
-        word_x, word_y = hirschberg_algorithm(X=X, Y=Y, verbose=False);
-    if depth == 0:
-        L = len(word_x);
-        print('');
-        print('\x1b[1mAlignment:\x1b[0m');
-        print(f'  {word_y}');
-        print(f'  {L*"-"}');
-        print(f'  {word_x}');
-        print('');
-    return word_x, word_y;
+        # Koordinaten des optimalen Übergangs berechnen:
+        coord1, coord2 = get_optimal_transition(Costs1=Costs1, Costs2=Costs2);
+        p = coord1[0];
+        # Divide and Conquer ausführen:
+        alignments_x_1, alignments_y_1 = hirschberg_algorithm_step(X=X[:p], Y=Y[:n], depth=depth+1, verbose=verbose);
+        alignments_x_2, alignments_y_2 = hirschberg_algorithm_step(X=X[p:], Y=Y[n:], depth=depth+1, verbose=verbose);
+        # Resultate zusammensetzen:
+        alignments_x = alignments_x_1 + alignments_x_2;
+        alignments_y = alignments_y_1 + alignments_y_2;
+        if len(Y[:n]) <= 1 and len(Y[n:]) <= 1:
+            # falls linke + rechte Hälfte nur aus <= 1 Buchstsaben bestehen, bestehen Alignment aus nur einem Teil ---> führe zusammen:
+            alignments_x = [ ''.join(alignments_x) ];
+            alignments_y = [ ''.join(alignments_y) ];
+        return alignments_x, alignments_y;

 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # METHODS cost matrix + optimal paths
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-def hirschberg_match_matrix(
+def compute_cost_matrix(
    X: str,
    Y: str,
 ) -> Tuple[NDArray[(Any, Any), int], NDArray[(Any, Any), Directions]]:
@ -239,28 +239,36 @@ def update_cost_matrix(
        Moves[i, j], Costs[i, j] = edges[index];
    return;

-def reconstruct_words(
-    X: str,
-    Y: str,
-    Moves: NDArray[(Any, Any), Directions],
-    path: List[Tuple[int, int]],
-) -> Tuple[str, str]:
-    word_x = '';
-    word_y = '';
-    for (i, j) in path:
-        x = X[i];
-        y = Y[j];
-        match Moves[i, j]:
-            case Directions.DIAGONAL:
-                word_x += x;
-                word_y += y;
-            case Directions.HORIZONTAL:
-                word_x += '-';
-                word_y += y;
-            case Directions.VERTICAL:
-                word_x += x;
-                word_y += '-';
-    return word_x, word_y;
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# METHODS optimaler treffpunkt
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+def get_optimal_transition(
+    Costs1: NDArray[(Any, Any), int],
+    Costs2: NDArray[(Any, Any), int],
+) -> Tuple[Tuple[int, int], Tuple[int, int]]:
+    '''
+    Rekonstruiere »Treffpunkt«, wo die Gesamtkosten minimiert sind.
+    Dieser Punkt stellt einen optimal Übergang für den Rekursionsschritt dar.
+    '''
+    (m, n1) = Costs1.shape;
+    (m, n2) = Costs2.shape;
+    info = [
+        (
+            Costs1[i, n1-1] + Costs2[m-1-i, n2-1],
+            (i, n1-1),
+            (m-1-i, n2-1),
+        )
+        for i in range(m)
+    ];
+    index = np.argmin([ cost for cost, _, _ in info ]);
+    coord1 = info[index][1];
+    coord2 = info[index][2];
+    return coord1, coord2;
+
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# METHODS reconstruction von words/paths
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 def reconstruct_optimal_path(
    Moves: NDArray[(Any, Any), Directions],
@ -295,21 +303,38 @@ def reconstruct_optimal_path_halves(
    Moves1: NDArray[(Any, Any), Directions],
    Moves2: NDArray[(Any, Any), Directions],
 ) -> Tuple[List[Tuple[int, int]], List[Tuple[int, int]]]:
-    (m, n1) = Costs1.shape;
-    (m, n2) = Costs2.shape;
-    info = [
-        (
-            Costs1[i, n1-1] + Costs2[m-1-i, n2-1],
-            (i, n1-1),
-            (m-1-i, n2-1),
-        )
-        for i in range(m)
-    ];
-    index = np.argmin([ cost for cost, _, _ in info ]);
-    path1 = reconstruct_optimal_path(Moves1, coord=info[index][1]);
-    path2 = reconstruct_optimal_path(Moves2, coord=info[index][2]);
+    '''
+    Rekonstruiere optimale Pfad für Rekursionsschritt,
+    wenn horizontales Wort in 2 aufgeteilt wird.
+    '''
+    coord1, coord2 = get_optimal_transition(Costs1=Costs1, Costs2=Costs2);
+    path1 = reconstruct_optimal_path(Moves1, coord=coord1);
+    path2 = reconstruct_optimal_path(Moves2, coord=coord2);
    return path1, path2;

+def reconstruct_words(
+    X: str,
+    Y: str,
+    moves: List[Directions],
+    path: List[Tuple[int, int]],
+) -> Tuple[str, str]:
+    word_x = '';
+    word_y = '';
+    for ((i, j), move) in zip(path, moves):
+        x = X[i];
+        y = Y[j];
+        match move:
+            case Directions.DIAGONAL:
+                word_x += x;
+                word_y += y;
+            case Directions.HORIZONTAL:
+                word_x += '-';
+                word_y += y;
+            case Directions.VERTICAL:
+                word_x += x;
+                word_y += '-';
+    return word_x, word_y;
+
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # AUXILIARY METHODS
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -346,10 +371,10 @@ def represent_cost_matrix(
    table_costs = table.copy();
    table_moves = table.copy();
    table_costs[3:(3+m), 3:(3+n)] = Costs;
-    table_moves[3:(3+m), 3:(3+n)] = '.';
+    table_moves[3:(3+m), 3:(3+n)] = '·';
    for (i, j) in path:
        # table_costs[3 + i, 3 + j] = f'\x1b[92;1m{table_costs[3 + i, 3 + j]}\x1b[0m';
-        table_moves[3 + i, 3 + j] = '@';
+        table_moves[3 + i, 3 + j] = '*';

    return table_costs, table_moves;

@ -358,7 +383,7 @@ def display_cost_matrix(
    path: List[Tuple[int, int]],
    X: str,
    Y: str,
-) -> Tuple[str, str]:
+) -> str:
    '''
    Zeigt Kostenmatrix + optimalen Pfad.

@ -372,9 +397,13 @@ def display_cost_matrix(
    '''
    table_costs, table_moves = represent_cost_matrix(Costs=Costs, path=path, X=X, Y=Y);
    # benutze pandas-Dataframe, um schöner darzustellen:
+    h = table_costs.shape[0];
    costs_repr = pd.DataFrame(table_costs).to_string(index=False, header=False);
    moves_repr = pd.DataFrame(table_moves).to_string(index=False, header=False);
-    return costs_repr, moves_repr;
+    table = np.concatenate([table_costs, np.full(shape=(h, 1), dtype=object, fill_value='    '), table_moves], axis=1);
+
+    repr = pd.DataFrame(table).to_string(index=False, header=False);
+    return repr;

 def display_cost_matrix_halves(
    Costs1: NDArray[(Any, Any), int],
@ -385,7 +414,7 @@ def display_cost_matrix_halves(
    X2: str,
    Y1: str,
    Y2: str,
-) -> Tuple[str, str]:
+) -> str:
    '''
    Zeigt Kostenmatrix + optimalen Pfad für Schritt im D & C Hirschberg-Algorithmus

@ -401,11 +430,14 @@ def display_cost_matrix_halves(
    table_costs2, table_moves2 = represent_cost_matrix(Costs=Costs2, path=path2, X=X2, Y=Y2, pad=True);

    # merge Taellen:
+    h = table_costs1.shape[0];
    table_costs = np.concatenate([table_costs1, table_costs2[::-1, ::-1]], axis=1);
    table_moves = np.concatenate([table_moves1, table_moves2[::-1, ::-1]], axis=1);
+    table = np.concatenate([table_costs, np.full(shape=(h, 1), dtype=object, fill_value='    '), table_moves], axis=1);

    # benutze pandas-Dataframe, um schöner darzustellen:
-    costs_repr = pd.DataFrame(table_costs).to_string(index=False, header=False);
-    moves_repr = pd.DataFrame(table_moves).to_string(index=False, header=False);
-
-    return costs_repr, moves_repr;
+    # costs_repr = pd.DataFrame(table_costs).to_string(index=False, header=False);
+    # moves_repr = pd.DataFrame(table_moves).to_string(index=False, header=False);
+    # return costs_repr, moves_repr;
+    repr = pd.DataFrame(table).to_string(index=False, header=False);
+    return repr;