master > master: code py - hirschberg darstellungen verbessert

This commit is contained in:
RD 2022-06-09 08:48:09 +02:00
parent 14a882e9d3
commit 9c5b88b64d
2 changed files with 144 additions and 103 deletions

View File

@ -41,9 +41,18 @@ def enter():
# verbose=True,
# );
## Beispiel für Seminarwoche 10 (Blatt 9):
hirschberg_algorithm_full(
X = 'ACGAAG',
Y = 'AGAT',
# hirschberg_algorithm_once(
hirschberg_algorithm(
# Y = 'ANSPANNEN',
# X = 'ANSTRENGEN',
# Y = 'AGAT',
# X = 'ACGAAG',
# Y = 'apple',
X = 'happily',
Y = 'apple',
# X = 'happily',
# Y = 'nei wolle elli wien',
# X = 'nie will elli wein',
verbose = True,
);
return;

View File

@ -17,7 +17,7 @@ from src.local.maths import *;
__all__ = [
'hirschberg_algorithm',
'hirschberg_algorithm_full',
'hirschberg_algorithm_once',
];
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -40,36 +40,55 @@ def missmatch_penalty(x: str, y: str):
# METHOD hirschberg_algorithm
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def hirschberg_algorithm_once(
X: str,
Y: str,
verbose: bool = False,
) -> Tuple[str, str]:
Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y);
path = reconstruct_optimal_path(Moves=Moves);
word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path);
if verbose:
repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y);
print(f'\n{repr}');
print(f'\n\x1b[1mOptimales Alignment:\x1b[0m');
print(word_y);
print(len(word_x) * '-');
print(word_x);
print('');
return word_x, word_y;
def hirschberg_algorithm(
X: str,
Y: str,
verbose: bool = False,
) -> Tuple[str, str]:
Costs, Moves = hirschberg_match_matrix(X = '-' + X, Y = '-' + Y);
path = reconstruct_optimal_path(Moves=Moves);
word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, Moves=Moves, path=path);
alignments_x, alignments_y = hirschberg_algorithm_step(X=X, Y=Y, depth=1, verbose=verbose);
word_x = ''.join(alignments_x);
word_y = ''.join(alignments_y);
if verbose:
L = len(word_x);
costs_repr, moves_repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y);
display_x = '|'.join(alignments_x);
display_y = '|'.join(alignments_y);
print(f'\n\x1b[1mOptimales Alignment:\x1b[0m');
print(display_y);
print(len(display_x) * '-');
print(display_x);
print('');
print('\x1b[1mAlignment:\x1b[0m');
print(f' {word_y}');
print(f' {L*"-"}');
print(f' {word_x}');
print('');
print(costs_repr);
print('');
print(moves_repr);
return word_x, word_y;
def hirschberg_algorithm_full(
def hirschberg_algorithm_step(
X: str,
Y: str,
depth: int = 0,
verbose: bool = False,
) -> Tuple[str, str]:
) -> Tuple[List[str], List[str]]:
n = len(Y);
if n > 1:
if n == 1:
Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y);
path = reconstruct_optimal_path(Moves=Moves);
word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path);
return [word_x], [word_y];
else:
n = int(np.ceil(n/2));
# bilde linke Hälfte vom horizontalen Wort:
@ -81,20 +100,12 @@ def hirschberg_algorithm_full(
X2 = X[::-1];
# Löse Teilprobleme:
Costs1, Moves1 = hirschberg_match_matrix(X = '-' + X1, Y = '-' + Y1);
Costs2, Moves2 = hirschberg_match_matrix(X = '-' + X2, Y = '-' + Y2);
path1, path2 = reconstruct_optimal_path_halves(
Costs1=Costs1,
Costs2=Costs2,
Moves1=Moves1,
Moves2=Moves2,
);
word_x_1, word_y_1 = reconstruct_words(X = '-' + X1, Y = '-' + Y1, Moves=Moves1, path=path1);
word_x_2, word_y_2 = reconstruct_words(X = '-' + X2, Y = '-' + Y2, Moves=Moves2, path=path2);
Costs1, Moves1 = compute_cost_matrix(X = '-' + X1, Y = '-' + Y1);
Costs2, Moves2 = compute_cost_matrix(X = '-' + X2, Y = '-' + Y2);
if verbose:
L = len(word_x_1) + len(word_x_2);
costs_repr, moves_repr = display_cost_matrix_halves(
path1, path2 = reconstruct_optimal_path_halves(Costs1=Costs1, Costs2=Costs2, Moves1=Moves1, Moves2=Moves2);
repr = display_cost_matrix_halves(
Costs1 = Costs1,
Costs2 = Costs2,
path1 = path1,
@ -104,39 +115,28 @@ def hirschberg_algorithm_full(
Y1 = '-' + Y1,
Y2 = '-' + Y2,
);
print('');
print(f'\x1b[1mRekursionstiefe: {depth}\x1b[0m')
print('');
print('\x1b[1mAlignment:\x1b[0m');
print(f' {word_y_1} {word_y_2[::-1]}');
print(f' {(L+1)*"-"}');
print(f' {word_x_1} {word_x_2[::-1]}');
print('');
print(moves_repr);
print(f'\n\x1b[1mRekursionstiefe: {depth}\x1b[0m\n\n{repr}')
coord = path1[-1];
m = coord[0];
word_x_1, word_y_1 = hirschberg_algorithm_full(X=X[:m], Y=Y[:n], depth=depth+1, verbose=True);
word_x_2, word_y_2 = hirschberg_algorithm_full(X=X[m:], Y=Y[n:], depth=depth+1, verbose=True);
word_x = word_x_1 + word_x_2;
word_y = word_y_1 + word_y_2;
else:
word_x, word_y = hirschberg_algorithm(X=X, Y=Y, verbose=False);
if depth == 0:
L = len(word_x);
print('');
print('\x1b[1mAlignment:\x1b[0m');
print(f' {word_y}');
print(f' {L*"-"}');
print(f' {word_x}');
print('');
return word_x, word_y;
# Koordinaten des optimalen Übergangs berechnen:
coord1, coord2 = get_optimal_transition(Costs1=Costs1, Costs2=Costs2);
p = coord1[0];
# Divide and Conquer ausführen:
alignments_x_1, alignments_y_1 = hirschberg_algorithm_step(X=X[:p], Y=Y[:n], depth=depth+1, verbose=verbose);
alignments_x_2, alignments_y_2 = hirschberg_algorithm_step(X=X[p:], Y=Y[n:], depth=depth+1, verbose=verbose);
# Resultate zusammensetzen:
alignments_x = alignments_x_1 + alignments_x_2;
alignments_y = alignments_y_1 + alignments_y_2;
if len(Y[:n]) <= 1 and len(Y[n:]) <= 1:
# falls linke + rechte Hälfte nur aus <= 1 Buchstsaben bestehen, bestehen Alignment aus nur einem Teil ---> führe zusammen:
alignments_x = [ ''.join(alignments_x) ];
alignments_y = [ ''.join(alignments_y) ];
return alignments_x, alignments_y;
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# METHODS cost matrix + optimal paths
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def hirschberg_match_matrix(
def compute_cost_matrix(
X: str,
Y: str,
) -> Tuple[NDArray[(Any, Any), int], NDArray[(Any, Any), Directions]]:
@ -239,28 +239,36 @@ def update_cost_matrix(
Moves[i, j], Costs[i, j] = edges[index];
return;
def reconstruct_words(
X: str,
Y: str,
Moves: NDArray[(Any, Any), Directions],
path: List[Tuple[int, int]],
) -> Tuple[str, str]:
word_x = '';
word_y = '';
for (i, j) in path:
x = X[i];
y = Y[j];
match Moves[i, j]:
case Directions.DIAGONAL:
word_x += x;
word_y += y;
case Directions.HORIZONTAL:
word_x += '-';
word_y += y;
case Directions.VERTICAL:
word_x += x;
word_y += '-';
return word_x, word_y;
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# METHODS optimaler treffpunkt
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def get_optimal_transition(
Costs1: NDArray[(Any, Any), int],
Costs2: NDArray[(Any, Any), int],
) -> Tuple[Tuple[int, int], Tuple[int, int]]:
'''
Rekonstruiere »Treffpunkt«, wo die Gesamtkosten minimiert sind.
Dieser Punkt stellt einen optimal Übergang für den Rekursionsschritt dar.
'''
(m, n1) = Costs1.shape;
(m, n2) = Costs2.shape;
info = [
(
Costs1[i, n1-1] + Costs2[m-1-i, n2-1],
(i, n1-1),
(m-1-i, n2-1),
)
for i in range(m)
];
index = np.argmin([ cost for cost, _, _ in info ]);
coord1 = info[index][1];
coord2 = info[index][2];
return coord1, coord2;
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# METHODS reconstruction von words/paths
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def reconstruct_optimal_path(
Moves: NDArray[(Any, Any), Directions],
@ -295,21 +303,38 @@ def reconstruct_optimal_path_halves(
Moves1: NDArray[(Any, Any), Directions],
Moves2: NDArray[(Any, Any), Directions],
) -> Tuple[List[Tuple[int, int]], List[Tuple[int, int]]]:
(m, n1) = Costs1.shape;
(m, n2) = Costs2.shape;
info = [
(
Costs1[i, n1-1] + Costs2[m-1-i, n2-1],
(i, n1-1),
(m-1-i, n2-1),
)
for i in range(m)
];
index = np.argmin([ cost for cost, _, _ in info ]);
path1 = reconstruct_optimal_path(Moves1, coord=info[index][1]);
path2 = reconstruct_optimal_path(Moves2, coord=info[index][2]);
'''
Rekonstruiere optimale Pfad für Rekursionsschritt,
wenn horizontales Wort in 2 aufgeteilt wird.
'''
coord1, coord2 = get_optimal_transition(Costs1=Costs1, Costs2=Costs2);
path1 = reconstruct_optimal_path(Moves1, coord=coord1);
path2 = reconstruct_optimal_path(Moves2, coord=coord2);
return path1, path2;
def reconstruct_words(
X: str,
Y: str,
moves: List[Directions],
path: List[Tuple[int, int]],
) -> Tuple[str, str]:
word_x = '';
word_y = '';
for ((i, j), move) in zip(path, moves):
x = X[i];
y = Y[j];
match move:
case Directions.DIAGONAL:
word_x += x;
word_y += y;
case Directions.HORIZONTAL:
word_x += '-';
word_y += y;
case Directions.VERTICAL:
word_x += x;
word_y += '-';
return word_x, word_y;
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# AUXILIARY METHODS
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -346,10 +371,10 @@ def represent_cost_matrix(
table_costs = table.copy();
table_moves = table.copy();
table_costs[3:(3+m), 3:(3+n)] = Costs;
table_moves[3:(3+m), 3:(3+n)] = '.';
table_moves[3:(3+m), 3:(3+n)] = '·';
for (i, j) in path:
# table_costs[3 + i, 3 + j] = f'\x1b[92;1m{table_costs[3 + i, 3 + j]}\x1b[0m';
table_moves[3 + i, 3 + j] = '@';
table_moves[3 + i, 3 + j] = '*';
return table_costs, table_moves;
@ -358,7 +383,7 @@ def display_cost_matrix(
path: List[Tuple[int, int]],
X: str,
Y: str,
) -> Tuple[str, str]:
) -> str:
'''
Zeigt Kostenmatrix + optimalen Pfad.
@ -372,9 +397,13 @@ def display_cost_matrix(
'''
table_costs, table_moves = represent_cost_matrix(Costs=Costs, path=path, X=X, Y=Y);
# benutze pandas-Dataframe, um schöner darzustellen:
h = table_costs.shape[0];
costs_repr = pd.DataFrame(table_costs).to_string(index=False, header=False);
moves_repr = pd.DataFrame(table_moves).to_string(index=False, header=False);
return costs_repr, moves_repr;
table = np.concatenate([table_costs, np.full(shape=(h, 1), dtype=object, fill_value=' '), table_moves], axis=1);
repr = pd.DataFrame(table).to_string(index=False, header=False);
return repr;
def display_cost_matrix_halves(
Costs1: NDArray[(Any, Any), int],
@ -385,7 +414,7 @@ def display_cost_matrix_halves(
X2: str,
Y1: str,
Y2: str,
) -> Tuple[str, str]:
) -> str:
'''
Zeigt Kostenmatrix + optimalen Pfad für Schritt im D & C Hirschberg-Algorithmus
@ -401,11 +430,14 @@ def display_cost_matrix_halves(
table_costs2, table_moves2 = represent_cost_matrix(Costs=Costs2, path=path2, X=X2, Y=Y2, pad=True);
# merge Taellen:
h = table_costs1.shape[0];
table_costs = np.concatenate([table_costs1, table_costs2[::-1, ::-1]], axis=1);
table_moves = np.concatenate([table_moves1, table_moves2[::-1, ::-1]], axis=1);
table = np.concatenate([table_costs, np.full(shape=(h, 1), dtype=object, fill_value=' '), table_moves], axis=1);
# benutze pandas-Dataframe, um schöner darzustellen:
costs_repr = pd.DataFrame(table_costs).to_string(index=False, header=False);
moves_repr = pd.DataFrame(table_moves).to_string(index=False, header=False);
return costs_repr, moves_repr;
# costs_repr = pd.DataFrame(table_costs).to_string(index=False, header=False);
# moves_repr = pd.DataFrame(table_moves).to_string(index=False, header=False);
# return costs_repr, moves_repr;
repr = pd.DataFrame(table).to_string(index=False, header=False);
return repr;