From 9c5b88b64d4aa47d38f9f4badc99c033d9c46788 Mon Sep 17 00:00:00 2001 From: raj_mathe Date: Thu, 9 Jun 2022 08:48:09 +0200 Subject: [PATCH] master > master: code py - hirschberg darstellungen verbessert --- code/python/src/main.py | 15 +- .../python/src/string_alignment/hirschberg.py | 232 ++++++++++-------- 2 files changed, 144 insertions(+), 103 deletions(-) diff --git a/code/python/src/main.py b/code/python/src/main.py index 10e90b6..5de027c 100644 --- a/code/python/src/main.py +++ b/code/python/src/main.py @@ -41,9 +41,18 @@ def enter(): # verbose=True, # ); ## Beispiel für Seminarwoche 10 (Blatt 9): - hirschberg_algorithm_full( - X = 'ACGAAG', - Y = 'AGAT', + # hirschberg_algorithm_once( + hirschberg_algorithm( + # Y = 'ANSPANNEN', + # X = 'ANSTRENGEN', + # Y = 'AGAT', + # X = 'ACGAAG', + # Y = 'apple', + X = 'happily', + Y = 'apple', + # X = 'happily', + # Y = 'nei wolle elli wien', + # X = 'nie will elli wein', verbose = True, ); return; diff --git a/code/python/src/string_alignment/hirschberg.py b/code/python/src/string_alignment/hirschberg.py index 71362b4..13843a8 100644 --- a/code/python/src/string_alignment/hirschberg.py +++ b/code/python/src/string_alignment/hirschberg.py @@ -17,7 +17,7 @@ from src.local.maths import *; __all__ = [ 'hirschberg_algorithm', - 'hirschberg_algorithm_full', + 'hirschberg_algorithm_once', ]; # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -40,36 +40,55 @@ def missmatch_penalty(x: str, y: str): # METHOD hirschberg_algorithm # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +def hirschberg_algorithm_once( + X: str, + Y: str, + verbose: bool = False, +) -> Tuple[str, str]: + Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y); + path = reconstruct_optimal_path(Moves=Moves); + word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path); + if verbose: + repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y); + print(f'\n{repr}'); + print(f'\n\x1b[1mOptimales Alignment:\x1b[0m'); + print(word_y); + print(len(word_x) * '-'); + print(word_x); + print(''); + return word_x, word_y; + def hirschberg_algorithm( X: str, Y: str, verbose: bool = False, ) -> Tuple[str, str]: - Costs, Moves = hirschberg_match_matrix(X = '-' + X, Y = '-' + Y); - path = reconstruct_optimal_path(Moves=Moves); - word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, Moves=Moves, path=path); + alignments_x, alignments_y = hirschberg_algorithm_step(X=X, Y=Y, depth=1, verbose=verbose); + word_x = ''.join(alignments_x); + word_y = ''.join(alignments_y); if verbose: - L = len(word_x); - costs_repr, moves_repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y); + display_x = '|'.join(alignments_x); + display_y = '|'.join(alignments_y); + print(f'\n\x1b[1mOptimales Alignment:\x1b[0m'); + print(display_y); + print(len(display_x) * '-'); + print(display_x); print(''); - print('\x1b[1mAlignment:\x1b[0m'); - print(f' {word_y}'); - print(f' {L*"-"}'); - print(f' {word_x}'); - print(''); - print(costs_repr); - print(''); - print(moves_repr); return word_x, word_y; -def hirschberg_algorithm_full( +def hirschberg_algorithm_step( X: str, Y: str, depth: int = 0, verbose: bool = False, -) -> Tuple[str, str]: +) -> Tuple[List[str], List[str]]: n = len(Y); - if n > 1: + if n == 1: + Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y); + path = reconstruct_optimal_path(Moves=Moves); + word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path); + return [word_x], [word_y]; + else: n = int(np.ceil(n/2)); # bilde linke Hälfte vom horizontalen Wort: @@ -81,20 +100,12 @@ def hirschberg_algorithm_full( X2 = X[::-1]; # Löse Teilprobleme: - Costs1, Moves1 = hirschberg_match_matrix(X = '-' + X1, Y = '-' + Y1); - Costs2, Moves2 = hirschberg_match_matrix(X = '-' + X2, Y = '-' + Y2); - path1, path2 = reconstruct_optimal_path_halves( - Costs1=Costs1, - Costs2=Costs2, - Moves1=Moves1, - Moves2=Moves2, - ); - word_x_1, word_y_1 = reconstruct_words(X = '-' + X1, Y = '-' + Y1, Moves=Moves1, path=path1); - word_x_2, word_y_2 = reconstruct_words(X = '-' + X2, Y = '-' + Y2, Moves=Moves2, path=path2); + Costs1, Moves1 = compute_cost_matrix(X = '-' + X1, Y = '-' + Y1); + Costs2, Moves2 = compute_cost_matrix(X = '-' + X2, Y = '-' + Y2); if verbose: - L = len(word_x_1) + len(word_x_2); - costs_repr, moves_repr = display_cost_matrix_halves( + path1, path2 = reconstruct_optimal_path_halves(Costs1=Costs1, Costs2=Costs2, Moves1=Moves1, Moves2=Moves2); + repr = display_cost_matrix_halves( Costs1 = Costs1, Costs2 = Costs2, path1 = path1, @@ -104,39 +115,28 @@ def hirschberg_algorithm_full( Y1 = '-' + Y1, Y2 = '-' + Y2, ); - print(''); - print(f'\x1b[1mRekursionstiefe: {depth}\x1b[0m') - print(''); - print('\x1b[1mAlignment:\x1b[0m'); - print(f' {word_y_1} {word_y_2[::-1]}'); - print(f' {(L+1)*"-"}'); - print(f' {word_x_1} {word_x_2[::-1]}'); - print(''); - print(moves_repr); + print(f'\n\x1b[1mRekursionstiefe: {depth}\x1b[0m\n\n{repr}') - coord = path1[-1]; - m = coord[0]; - word_x_1, word_y_1 = hirschberg_algorithm_full(X=X[:m], Y=Y[:n], depth=depth+1, verbose=True); - word_x_2, word_y_2 = hirschberg_algorithm_full(X=X[m:], Y=Y[n:], depth=depth+1, verbose=True); - word_x = word_x_1 + word_x_2; - word_y = word_y_1 + word_y_2; - else: - word_x, word_y = hirschberg_algorithm(X=X, Y=Y, verbose=False); - if depth == 0: - L = len(word_x); - print(''); - print('\x1b[1mAlignment:\x1b[0m'); - print(f' {word_y}'); - print(f' {L*"-"}'); - print(f' {word_x}'); - print(''); - return word_x, word_y; + # Koordinaten des optimalen Übergangs berechnen: + coord1, coord2 = get_optimal_transition(Costs1=Costs1, Costs2=Costs2); + p = coord1[0]; + # Divide and Conquer ausführen: + alignments_x_1, alignments_y_1 = hirschberg_algorithm_step(X=X[:p], Y=Y[:n], depth=depth+1, verbose=verbose); + alignments_x_2, alignments_y_2 = hirschberg_algorithm_step(X=X[p:], Y=Y[n:], depth=depth+1, verbose=verbose); + # Resultate zusammensetzen: + alignments_x = alignments_x_1 + alignments_x_2; + alignments_y = alignments_y_1 + alignments_y_2; + if len(Y[:n]) <= 1 and len(Y[n:]) <= 1: + # falls linke + rechte Hälfte nur aus <= 1 Buchstsaben bestehen, bestehen Alignment aus nur einem Teil ---> führe zusammen: + alignments_x = [ ''.join(alignments_x) ]; + alignments_y = [ ''.join(alignments_y) ]; + return alignments_x, alignments_y; # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # METHODS cost matrix + optimal paths # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -def hirschberg_match_matrix( +def compute_cost_matrix( X: str, Y: str, ) -> Tuple[NDArray[(Any, Any), int], NDArray[(Any, Any), Directions]]: @@ -239,28 +239,36 @@ def update_cost_matrix( Moves[i, j], Costs[i, j] = edges[index]; return; -def reconstruct_words( - X: str, - Y: str, - Moves: NDArray[(Any, Any), Directions], - path: List[Tuple[int, int]], -) -> Tuple[str, str]: - word_x = ''; - word_y = ''; - for (i, j) in path: - x = X[i]; - y = Y[j]; - match Moves[i, j]: - case Directions.DIAGONAL: - word_x += x; - word_y += y; - case Directions.HORIZONTAL: - word_x += '-'; - word_y += y; - case Directions.VERTICAL: - word_x += x; - word_y += '-'; - return word_x, word_y; +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# METHODS optimaler treffpunkt +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +def get_optimal_transition( + Costs1: NDArray[(Any, Any), int], + Costs2: NDArray[(Any, Any), int], +) -> Tuple[Tuple[int, int], Tuple[int, int]]: + ''' + Rekonstruiere »Treffpunkt«, wo die Gesamtkosten minimiert sind. + Dieser Punkt stellt einen optimal Übergang für den Rekursionsschritt dar. + ''' + (m, n1) = Costs1.shape; + (m, n2) = Costs2.shape; + info = [ + ( + Costs1[i, n1-1] + Costs2[m-1-i, n2-1], + (i, n1-1), + (m-1-i, n2-1), + ) + for i in range(m) + ]; + index = np.argmin([ cost for cost, _, _ in info ]); + coord1 = info[index][1]; + coord2 = info[index][2]; + return coord1, coord2; + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# METHODS reconstruction von words/paths +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def reconstruct_optimal_path( Moves: NDArray[(Any, Any), Directions], @@ -295,21 +303,38 @@ def reconstruct_optimal_path_halves( Moves1: NDArray[(Any, Any), Directions], Moves2: NDArray[(Any, Any), Directions], ) -> Tuple[List[Tuple[int, int]], List[Tuple[int, int]]]: - (m, n1) = Costs1.shape; - (m, n2) = Costs2.shape; - info = [ - ( - Costs1[i, n1-1] + Costs2[m-1-i, n2-1], - (i, n1-1), - (m-1-i, n2-1), - ) - for i in range(m) - ]; - index = np.argmin([ cost for cost, _, _ in info ]); - path1 = reconstruct_optimal_path(Moves1, coord=info[index][1]); - path2 = reconstruct_optimal_path(Moves2, coord=info[index][2]); + ''' + Rekonstruiere optimale Pfad für Rekursionsschritt, + wenn horizontales Wort in 2 aufgeteilt wird. + ''' + coord1, coord2 = get_optimal_transition(Costs1=Costs1, Costs2=Costs2); + path1 = reconstruct_optimal_path(Moves1, coord=coord1); + path2 = reconstruct_optimal_path(Moves2, coord=coord2); return path1, path2; +def reconstruct_words( + X: str, + Y: str, + moves: List[Directions], + path: List[Tuple[int, int]], +) -> Tuple[str, str]: + word_x = ''; + word_y = ''; + for ((i, j), move) in zip(path, moves): + x = X[i]; + y = Y[j]; + match move: + case Directions.DIAGONAL: + word_x += x; + word_y += y; + case Directions.HORIZONTAL: + word_x += '-'; + word_y += y; + case Directions.VERTICAL: + word_x += x; + word_y += '-'; + return word_x, word_y; + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # AUXILIARY METHODS # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -346,10 +371,10 @@ def represent_cost_matrix( table_costs = table.copy(); table_moves = table.copy(); table_costs[3:(3+m), 3:(3+n)] = Costs; - table_moves[3:(3+m), 3:(3+n)] = '.'; + table_moves[3:(3+m), 3:(3+n)] = '·'; for (i, j) in path: # table_costs[3 + i, 3 + j] = f'\x1b[92;1m{table_costs[3 + i, 3 + j]}\x1b[0m'; - table_moves[3 + i, 3 + j] = '@'; + table_moves[3 + i, 3 + j] = '*'; return table_costs, table_moves; @@ -358,7 +383,7 @@ def display_cost_matrix( path: List[Tuple[int, int]], X: str, Y: str, -) -> Tuple[str, str]: +) -> str: ''' Zeigt Kostenmatrix + optimalen Pfad. @@ -372,9 +397,13 @@ def display_cost_matrix( ''' table_costs, table_moves = represent_cost_matrix(Costs=Costs, path=path, X=X, Y=Y); # benutze pandas-Dataframe, um schöner darzustellen: + h = table_costs.shape[0]; costs_repr = pd.DataFrame(table_costs).to_string(index=False, header=False); moves_repr = pd.DataFrame(table_moves).to_string(index=False, header=False); - return costs_repr, moves_repr; + table = np.concatenate([table_costs, np.full(shape=(h, 1), dtype=object, fill_value=' '), table_moves], axis=1); + + repr = pd.DataFrame(table).to_string(index=False, header=False); + return repr; def display_cost_matrix_halves( Costs1: NDArray[(Any, Any), int], @@ -385,7 +414,7 @@ def display_cost_matrix_halves( X2: str, Y1: str, Y2: str, -) -> Tuple[str, str]: +) -> str: ''' Zeigt Kostenmatrix + optimalen Pfad für Schritt im D & C Hirschberg-Algorithmus @@ -401,11 +430,14 @@ def display_cost_matrix_halves( table_costs2, table_moves2 = represent_cost_matrix(Costs=Costs2, path=path2, X=X2, Y=Y2, pad=True); # merge Taellen: + h = table_costs1.shape[0]; table_costs = np.concatenate([table_costs1, table_costs2[::-1, ::-1]], axis=1); table_moves = np.concatenate([table_moves1, table_moves2[::-1, ::-1]], axis=1); + table = np.concatenate([table_costs, np.full(shape=(h, 1), dtype=object, fill_value=' '), table_moves], axis=1); # benutze pandas-Dataframe, um schöner darzustellen: - costs_repr = pd.DataFrame(table_costs).to_string(index=False, header=False); - moves_repr = pd.DataFrame(table_moves).to_string(index=False, header=False); - - return costs_repr, moves_repr; + # costs_repr = pd.DataFrame(table_costs).to_string(index=False, header=False); + # moves_repr = pd.DataFrame(table_moves).to_string(index=False, header=False); + # return costs_repr, moves_repr; + repr = pd.DataFrame(table).to_string(index=False, header=False); + return repr;