master > master: code py - refactoring
- umbenennungen - verbesseungen der Darstellungen - enums zur beseren Steuerung der versch. Modi - refactoring des Algorithmus - Verwendung von tabulator
This commit is contained in:
parent
57bc1e68e6
commit
760bff11f2
@ -7,7 +7,7 @@
|
||||
|
||||
from __future__ import annotations;
|
||||
|
||||
from src.local.typing import *;
|
||||
from src.thirdparty.types import *;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# EXPORTS
|
||||
|
@ -6,12 +6,8 @@
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from __future__ import annotations;
|
||||
from enum import Enum;
|
||||
from dataclasses import dataclass;
|
||||
from dataclasses import field
|
||||
from platform import node;
|
||||
|
||||
from src.local.typing import *;
|
||||
from src.thirdparty.types import *;
|
||||
|
||||
from src.core.log import *;
|
||||
from src.stacks.stack import *;
|
||||
|
20
code/python/src/hirschberg/__init__.py
Normal file
20
code/python/src/hirschberg/__init__.py
Normal file
@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# IMPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from src.hirschberg.algorithms import *;
|
||||
from src.hirschberg.constants import *;
|
||||
from src.hirschberg.display import *;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# EXPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
__all__ = [
|
||||
'hirschberg_algorithm',
|
||||
'VerboseMode',
|
||||
'DisplayOptions',
|
||||
];
|
152
code/python/src/hirschberg/algorithms.py
Normal file
152
code/python/src/hirschberg/algorithms.py
Normal file
@ -0,0 +1,152 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# IMPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from src.thirdparty.types import *;
|
||||
from src.thirdparty.maths import *;
|
||||
|
||||
from src.hirschberg.constants import *;
|
||||
from src.hirschberg.display import *;
|
||||
from src.hirschberg.matrix import *;
|
||||
from src.hirschberg.paths import *;
|
||||
from src.hirschberg.types import *;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# EXPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
__all__ = [
|
||||
'hirschberg_algorithm',
|
||||
'simple_algorithm',
|
||||
];
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# METHOD hirschberg_algorithm
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def simple_algorithm(
|
||||
X: str,
|
||||
Y: str,
|
||||
verb: VerboseMode = VerboseMode.NONE,
|
||||
show: List[DisplayOptions] = [],
|
||||
) -> Tuple[str, str]:
|
||||
'''
|
||||
Dieser Algorithmus berechnet die Edit-Distanzen + optimale Richtungen ein Mal.
|
||||
Darus wird ein optimales Alignment direkt abgeleitet.
|
||||
'''
|
||||
Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y);
|
||||
path = reconstruct_optimal_path(Moves=Moves);
|
||||
word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path);
|
||||
if verb != VerboseMode.NONE:
|
||||
repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y, verb=verb);
|
||||
display = word_y + f'\n{"-"*len(word_x)}\n' + word_x;
|
||||
print(f'\n{repr}\n\n\x1b[1mOptimales Alignment:\x1b[0m\n\n{display}\n');
|
||||
return word_x, word_y;
|
||||
|
||||
def hirschberg_algorithm(
|
||||
X: str,
|
||||
Y: str,
|
||||
once: bool = False,
|
||||
verb: VerboseMode = VerboseMode.NONE,
|
||||
show: List[DisplayOptions] = [],
|
||||
) -> Tuple[str, str]:
|
||||
'''
|
||||
Der Hirschberg-Algorithmus berechnet nur die Edit-Distanzen (Kostenmatrix)
|
||||
und weder speichert noch berechnet die Matrix der optimalen Richtungen.
|
||||
|
||||
Dies liefert eine Platz-effizientere Methode als die simple Methode.
|
||||
|
||||
Durch Rekursion wird eine Art Traceback durch die zugrunde liegende DP erreicht.
|
||||
Daraus wird unmittelbar ein optimales Alignment bestimmt.
|
||||
Des Weiteren werden Zeitkosten durch Divide-and-Conquer klein gehalten.
|
||||
'''
|
||||
# ggf. nur den simplen Algorithmus ausführen:
|
||||
if once:
|
||||
return simple_algorithm(X=X, Y=Y, verb=verb, show=show);
|
||||
|
||||
align = hirschberg_algorithm_step(X=X, Y=Y, depth=1, verb=verb, show=show);
|
||||
word_x = align.as_string1();
|
||||
word_y = align.as_string2();
|
||||
|
||||
# verbose output hier behandeln (irrelevant für Algorithmus):
|
||||
if verb != VerboseMode.NONE:
|
||||
if DisplayOptions.TREE in show:
|
||||
display = align.astree(braces=True);
|
||||
else:
|
||||
display_x = align.as_string1(braces=True);
|
||||
display_y = align.as_string2(braces=True);
|
||||
display = display_y + f'\n{"-"*len(display_x)}\n' + display_x;
|
||||
print(f'\n\x1b[1mOptimales Alignment:\x1b[0m\n\n{display}\n');
|
||||
|
||||
return word_x, word_y;
|
||||
|
||||
def hirschberg_algorithm_step(
|
||||
X: str,
|
||||
Y: str,
|
||||
depth: int = 0,
|
||||
verb: VerboseMode = VerboseMode.NONE,
|
||||
show: List[DisplayOptions] = [],
|
||||
) -> Alignment:
|
||||
'''
|
||||
Der rekursive Schritt der Hirschberg-Algorithmus teil eines der Wörter in zwei
|
||||
und bestimmt eine entsprechende Aufteilung des zweiten Wortes in zwei,
|
||||
die die Edit-Distanz minimiert.
|
||||
|
||||
Dies liefert uns Information über eine Stelle des optimalen Pfads durch die Kostenmatrix
|
||||
sowie eine Aufteilung des Problems in eine linke und rechte Hälfte.
|
||||
'''
|
||||
n = len(Y);
|
||||
if n == 1:
|
||||
Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y);
|
||||
path = reconstruct_optimal_path(Moves=Moves);
|
||||
word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path);
|
||||
|
||||
# verbose output hier behandeln (irrelevant für Algorithmus):
|
||||
if verb != VerboseMode.NONE and (DisplayOptions.ATOMS in show):
|
||||
repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y, verb=verb);
|
||||
print(f'\n\x1b[1mRekursionstiefe: {depth}\x1b[0m\n\n{repr}')
|
||||
|
||||
return AlignmentBasic(word1=word_x, word2=word_y);
|
||||
else:
|
||||
n = int(np.ceil(n/2));
|
||||
|
||||
# bilde linke Hälfte vom horizontalen Wort:
|
||||
Y1 = Y[:n];
|
||||
X1 = X;
|
||||
|
||||
# bilde rechte Hälfte vom horizontalen Wort (und kehre h. + v. um):
|
||||
Y2 = Y[n:][::-1];
|
||||
X2 = X[::-1];
|
||||
|
||||
# Löse Teilprobleme:
|
||||
Costs1, Moves1 = compute_cost_matrix(X = '-' + X1, Y = '-' + Y1);
|
||||
Costs2, Moves2 = compute_cost_matrix(X = '-' + X2, Y = '-' + Y2);
|
||||
|
||||
# verbose output hier behandeln (irrelevant für Algorithmus):
|
||||
if verb != VerboseMode.NONE:
|
||||
path1, path2 = reconstruct_optimal_path_halves(Costs1=Costs1, Costs2=Costs2, Moves1=Moves1, Moves2=Moves2);
|
||||
repr = display_cost_matrix_halves(
|
||||
Costs1 = Costs1,
|
||||
Costs2 = Costs2,
|
||||
path1 = path1,
|
||||
path2 = path2,
|
||||
X1 = '-' + X1,
|
||||
X2 = '-' + X2,
|
||||
Y1 = '-' + Y1,
|
||||
Y2 = '-' + Y2,
|
||||
verb = verb,
|
||||
);
|
||||
print(f'\n\x1b[1mRekursionstiefe: {depth}\x1b[0m\n\n{repr}')
|
||||
|
||||
# Koordinaten des optimalen Übergangs berechnen:
|
||||
coord1, coord2 = get_optimal_transition(Costs1=Costs1, Costs2=Costs2);
|
||||
p = coord1[0];
|
||||
# Divide and Conquer ausführen:
|
||||
align_left = hirschberg_algorithm_step(X=X[:p], Y=Y[:n], depth=depth+1, verb=verb, show=show);
|
||||
align_right = hirschberg_algorithm_step(X=X[p:], Y=Y[n:], depth=depth+1, verb=verb, show=show);
|
||||
|
||||
# Resultate zusammensetzen:
|
||||
return AlignmentPair(left=align_left, right=align_right);
|
51
code/python/src/hirschberg/constants.py
Normal file
51
code/python/src/hirschberg/constants.py
Normal file
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# IMPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from src.thirdparty.types import *;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# EXPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
__all__ = [
|
||||
'VerboseMode',
|
||||
'DisplayOptions',
|
||||
'Directions',
|
||||
'gap_penalty',
|
||||
'missmatch_penalty',
|
||||
];
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# ENUMS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
class VerboseMode(Enum):
|
||||
NONE = -1;
|
||||
COSTS = 0;
|
||||
MOVES = 1;
|
||||
COSTS_AND_MOVES = 2;
|
||||
|
||||
class DisplayOptions(Enum):
|
||||
TREE = 0;
|
||||
ATOMS = 1;
|
||||
|
||||
class Directions(Enum):
|
||||
UNSET = -1;
|
||||
# Prioritäten hier setzen
|
||||
DIAGONAL = 0;
|
||||
HORIZONTAL = 1;
|
||||
VERTICAL = 2;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# PENALTIES
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def gap_penalty(x: str):
|
||||
return 1;
|
||||
|
||||
def missmatch_penalty(x: str, y: str):
|
||||
return 0 if x == y else 1;
|
123
code/python/src/hirschberg/display.py
Normal file
123
code/python/src/hirschberg/display.py
Normal file
@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# IMPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from src.thirdparty.types import *;
|
||||
from src.thirdparty.maths import *;
|
||||
|
||||
from src.hirschberg.constants import *;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# EXPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
__all__ = [
|
||||
'represent_cost_matrix',
|
||||
'display_cost_matrix',
|
||||
'display_cost_matrix_halves',
|
||||
];
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# METHODS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def represent_cost_matrix(
|
||||
Costs: np.ndarray, # NDArray[(Any, Any), int],
|
||||
path: List[Tuple[int, int]],
|
||||
X: str,
|
||||
Y: str,
|
||||
verb: VerboseMode,
|
||||
pad: bool = False,
|
||||
) -> np.ndarray: # NDArray[(Any, Any), Any]:
|
||||
m = len(X); # display vertically
|
||||
n = len(Y); # display horizontally
|
||||
|
||||
# erstelle string-Array:
|
||||
if pad:
|
||||
table = np.full(shape=(3 + m + 3, 3 + n + 1), dtype=object, fill_value='');
|
||||
else:
|
||||
table = np.full(shape=(3 + m, 3 + n), dtype=object, fill_value='');
|
||||
|
||||
# topmost rows:
|
||||
table[0, 3:(3+n)] = [str(j) for j in range(n)];
|
||||
table[1, 3:(3+n)] = [y for y in Y];
|
||||
table[2, 3:(3+n)] = '--';
|
||||
# leftmost columns:
|
||||
table[3:(3+m), 0] = [str(i) for i in range(m)];
|
||||
table[3:(3+m), 1] = [x for x in X];
|
||||
table[3:(3+m), 2] = '|';
|
||||
|
||||
if pad:
|
||||
table[-3, 3:(3+n)] = '--';
|
||||
table[3:(3+m), -1] = '|';
|
||||
|
||||
match verb:
|
||||
case VerboseMode.MOVES:
|
||||
table[3:(3+m), 3:(3+n)] = '.';
|
||||
for (i, j) in path:
|
||||
table[3 + i, 3 + j] = '*';
|
||||
case VerboseMode.COSTS | VerboseMode.COSTS_AND_MOVES:
|
||||
table[3:(3+m), 3:(3+n)] = Costs.copy();
|
||||
if verb == VerboseMode.COSTS_AND_MOVES:
|
||||
for (i, j) in path:
|
||||
table[3 + i, 3 + j] = f'\x1b[31;4;1m{table[3 + i, 3 + j]}\x1b[0m';
|
||||
|
||||
return table;
|
||||
|
||||
def display_cost_matrix(
|
||||
Costs: np.ndarray, # NDArray[(Any, Any), int],
|
||||
path: List[Tuple[int, int]],
|
||||
X: str,
|
||||
Y: str,
|
||||
verb: VerboseMode,
|
||||
) -> str:
|
||||
'''
|
||||
Zeigt Kostenmatrix + optimalen Pfad.
|
||||
|
||||
@inputs
|
||||
- `Costs` - Kostenmatrix
|
||||
- `Moves` - Kodiert die optimalen Schritte
|
||||
- `X`, `Y` - Strings
|
||||
|
||||
@returns
|
||||
- eine 'printable' Darstellung der Matrix mit den Strings X, Y + Indexes.
|
||||
'''
|
||||
table = represent_cost_matrix(Costs=Costs, path=path, X=X, Y=Y, verb=verb);
|
||||
# benutze pandas-Dataframe + tabulate, um schöner darzustellen:
|
||||
repr = tabulate(pd.DataFrame(table), showindex=False, stralign='center', tablefmt='plain');
|
||||
return repr;
|
||||
|
||||
def display_cost_matrix_halves(
|
||||
Costs1: np.ndarray, # NDArray[(Any, Any), int],
|
||||
Costs2: np.ndarray, # NDArray[(Any, Any), int],
|
||||
path1: List[Tuple[int, int]],
|
||||
path2: List[Tuple[int, int]],
|
||||
X1: str,
|
||||
X2: str,
|
||||
Y1: str,
|
||||
Y2: str,
|
||||
verb: VerboseMode,
|
||||
) -> str:
|
||||
'''
|
||||
Zeigt Kostenmatrix + optimalen Pfad für Schritt im D & C Hirschberg-Algorithmus
|
||||
|
||||
@inputs
|
||||
- `Costs1`, `Costs2` - Kostenmatrizen
|
||||
- `Moves1`, `Moves2` - Kodiert die optimalen Schritte
|
||||
- `X1`, `X2`, `Y1`, `Y2` - Strings
|
||||
|
||||
@returns
|
||||
- eine 'printable' Darstellung der Matrix mit den Strings X, Y + Indexes.
|
||||
'''
|
||||
table1 = represent_cost_matrix(Costs=Costs1, path=path1, X=X1, Y=Y1, verb=verb, pad=True);
|
||||
table2 = represent_cost_matrix(Costs=Costs2, path=path2, X=X2, Y=Y2, verb=verb, pad=True);
|
||||
|
||||
# merge Taellen:
|
||||
table = np.concatenate([table1[:, :-1], table2[::-1, ::-1]], axis=1);
|
||||
|
||||
# benutze pandas-Dataframe + tabulate, um schöner darzustellen:
|
||||
repr = tabulate(pd.DataFrame(table), showindex=False, stralign='center', tablefmt='plain');
|
||||
return repr;
|
127
code/python/src/hirschberg/matrix.py
Normal file
127
code/python/src/hirschberg/matrix.py
Normal file
@ -0,0 +1,127 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# IMPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from src.thirdparty.types import *;
|
||||
from src.thirdparty.maths import *;
|
||||
|
||||
from src.hirschberg.constants import *;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# EXPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
__all__ = [
|
||||
'compute_cost_matrix',
|
||||
'update_cost_matrix',
|
||||
];
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# METHODS cost matrix + optimal paths
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def compute_cost_matrix(
|
||||
X: str,
|
||||
Y: str,
|
||||
) -> Tuple[np.ndarray, np.ndarray]: # Tuple[NDArray[(Any, Any), int], NDArray[(Any, Any), Directions]]:
|
||||
'''
|
||||
Berechnet Hirschberg-Costs-Matrix (ohne Rekursion).
|
||||
|
||||
Annahmen:
|
||||
- X[0] = gap
|
||||
- Y[0] = gap
|
||||
'''
|
||||
m = len(X); # display vertically
|
||||
n = len(Y); # display horizontally
|
||||
Costs = np.full(shape=(m, n), dtype=int, fill_value=0);
|
||||
Moves = np.full(shape=(m, n), dtype=Directions, fill_value=Directions.UNSET);
|
||||
|
||||
# zuerst 0. Spalte und 0. Zeile ausfüllen:
|
||||
for i, x in list(enumerate(X))[1:]:
|
||||
update_cost_matrix(Costs, Moves, x, '', i, 0);
|
||||
|
||||
for j, y in list(enumerate(Y))[1:]:
|
||||
update_cost_matrix(Costs, Moves, '', y, 0, j);
|
||||
|
||||
# jetzt alle »inneren« Werte bestimmen:
|
||||
for i, x in list(enumerate(X))[1:]:
|
||||
for j, y in list(enumerate(Y))[1:]:
|
||||
update_cost_matrix(Costs, Moves, x, y, i, j);
|
||||
return Costs, Moves;
|
||||
|
||||
def update_cost_matrix(
|
||||
Costs: np.ndarray, # NDArray[(Any, Any), int],
|
||||
Moves: np.ndarray, # NDArray[(Any, Any), Directions],
|
||||
x: str,
|
||||
y: str,
|
||||
i: int,
|
||||
j: int,
|
||||
):
|
||||
'''
|
||||
Schrittweise Funktion zur Aktualisierung vom Eintrag `(i,j)` in der Kostenmatrix.
|
||||
|
||||
Annahme:
|
||||
- alle »Vorgänger« von `(i,j)` in der Matrix sind bereits optimiert.
|
||||
|
||||
@inputs
|
||||
- `Costs` - bisher berechnete Kostenmatrix
|
||||
- `Moves` - bisher berechnete optimale Schritte
|
||||
- `i`, `x` - Position und Wert in String `X` (»vertical« dargestellt)
|
||||
- `j`, `y` - Position und Wert in String `Y` (»horizontal« dargestellt)
|
||||
'''
|
||||
|
||||
# nichts zu tun, wenn (i, j) == (0, 0):
|
||||
if i == 0 and j == 0:
|
||||
Costs[0, 0] = 0;
|
||||
return;
|
||||
|
||||
################################
|
||||
# NOTE: Berechnung von möglichen Moves wie folgt.
|
||||
#
|
||||
# Fall 1: (i-1,j-1) ---> (i,j)
|
||||
# ==> Stringvergleich ändert sich wie folgt:
|
||||
# s1 s1 x
|
||||
# ---- ---> ------
|
||||
# s2 s2 y
|
||||
#
|
||||
# Fall 2: (i,j-1) ---> (i,j)
|
||||
# ==> Stringvergleich ändert sich wie folgt:
|
||||
# s1 s1 GAP
|
||||
# ---- ---> -------
|
||||
# s2 s2 y
|
||||
#
|
||||
# Fall 3: (i-1,j) ---> (i,j)
|
||||
# ==> Stringvergleich ändert sich wie folgt:
|
||||
# s1 s1 x
|
||||
# ---- ---> -------
|
||||
# s2 s2 GAP
|
||||
#
|
||||
# Diese Fälle berücksichtigen wir:
|
||||
################################
|
||||
edges = [];
|
||||
if i > 0 and j > 0:
|
||||
edges.append((
|
||||
Directions.DIAGONAL,
|
||||
Costs[i-1, j-1] + missmatch_penalty(x, y),
|
||||
));
|
||||
if j > 0:
|
||||
edges.append((
|
||||
Directions.HORIZONTAL,
|
||||
Costs[i, j-1] + gap_penalty(y),
|
||||
));
|
||||
if i > 0:
|
||||
edges.append((
|
||||
Directions.VERTICAL,
|
||||
Costs[i-1, j] + gap_penalty(x),
|
||||
));
|
||||
|
||||
if len(edges) > 0:
|
||||
# Sortiere nach Priorität (festgelegt in Enum):
|
||||
edges = sorted(edges, key=lambda x: x[0].value);
|
||||
# Wähle erste Möglichkeit mit minimalen Kosten:
|
||||
index = np.argmin([ cost for _, cost in edges]);
|
||||
Moves[i, j], Costs[i, j] = edges[index];
|
||||
return;
|
125
code/python/src/hirschberg/paths.py
Normal file
125
code/python/src/hirschberg/paths.py
Normal file
@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# IMPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from src.thirdparty.types import *;
|
||||
from src.thirdparty.maths import *;
|
||||
|
||||
from src.hirschberg.constants import *;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# EXPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
__all__ = [
|
||||
'get_optimal_transition',
|
||||
'reconstruct_optimal_path',
|
||||
'reconstruct_optimal_path_halves',
|
||||
'reconstruct_words',
|
||||
];
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# METHODS optimaler treffpunkt
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def get_optimal_transition(
|
||||
Costs1: np.ndarray, # NDArray[(Any, Any), int],
|
||||
Costs2: np.ndarray, # NDArray[(Any, Any), int],
|
||||
) -> Tuple[Tuple[int, int], Tuple[int, int]]:
|
||||
'''
|
||||
Rekonstruiere »Treffpunkt«, wo die Gesamtkosten minimiert sind.
|
||||
Dieser Punkt stellt einen optimal Übergang für den Rekursionsschritt dar.
|
||||
'''
|
||||
(m, n1) = Costs1.shape;
|
||||
(m, n2) = Costs2.shape;
|
||||
info = [
|
||||
(
|
||||
Costs1[i, n1-1] + Costs2[m-1-i, n2-1],
|
||||
(i, n1-1),
|
||||
(m-1-i, n2-1),
|
||||
)
|
||||
for i in range(m)
|
||||
];
|
||||
index = np.argmin([ cost for cost, _, _ in info ]);
|
||||
coord1 = info[index][1];
|
||||
coord2 = info[index][2];
|
||||
return coord1, coord2;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# METHODS reconstruction von words/paths
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def reconstruct_optimal_path(
|
||||
Moves: np.ndarray, # NDArray[(Any, Any), Directions],
|
||||
coord: Optional[Tuple[int, int]] = None,
|
||||
) -> List[Tuple[int, int]]:
|
||||
'''
|
||||
Liest Matrix mit optimalen Schritten den optimalen Pfad aus,
|
||||
angenfangen von Endkoordinaten.
|
||||
'''
|
||||
if coord is None:
|
||||
m, n = Moves.shape;
|
||||
(i, j) = (m-1, n-1);
|
||||
else:
|
||||
(i, j) = coord;
|
||||
path = [(i, j)];
|
||||
while (i, j) != (0, 0):
|
||||
match Moves[i, j]:
|
||||
case Directions.DIAGONAL:
|
||||
(i, j) = (i - 1, j - 1);
|
||||
case Directions.HORIZONTAL:
|
||||
(i, j) = (i, j - 1);
|
||||
case Directions.VERTICAL:
|
||||
(i, j) = (i - 1, j);
|
||||
case _:
|
||||
break;
|
||||
path.append((i, j));
|
||||
return path[::-1];
|
||||
|
||||
def reconstruct_optimal_path_halves(
|
||||
Costs1: np.ndarray, # NDArray[(Any, Any), int],
|
||||
Costs2: np.ndarray, # NDArray[(Any, Any), int],
|
||||
Moves1: np.ndarray, # NDArray[(Any, Any), Directions],
|
||||
Moves2: np.ndarray, # NDArray[(Any, Any), Directions],
|
||||
) -> Tuple[List[Tuple[int, int]], List[Tuple[int, int]]]:
|
||||
'''
|
||||
Rekonstruiere optimale Pfad für Rekursionsschritt,
|
||||
wenn horizontales Wort in 2 aufgeteilt wird.
|
||||
'''
|
||||
coord1, coord2 = get_optimal_transition(Costs1=Costs1, Costs2=Costs2);
|
||||
path1 = reconstruct_optimal_path(Moves1, coord=coord1);
|
||||
path2 = reconstruct_optimal_path(Moves2, coord=coord2);
|
||||
return path1, path2;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# METHODS reconstruction von words
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def reconstruct_words(
|
||||
X: str,
|
||||
Y: str,
|
||||
moves: List[Directions],
|
||||
path: List[Tuple[int, int]],
|
||||
) -> Tuple[str, str]:
|
||||
'''
|
||||
Berechnet String-Alignment aus Path.
|
||||
'''
|
||||
word_x = '';
|
||||
word_y = '';
|
||||
for ((i, j), move) in zip(path, moves):
|
||||
x = X[i];
|
||||
y = Y[j];
|
||||
match move:
|
||||
case Directions.DIAGONAL:
|
||||
word_x += x;
|
||||
word_y += y;
|
||||
case Directions.HORIZONTAL:
|
||||
word_x += '-';
|
||||
word_y += y;
|
||||
case Directions.VERTICAL:
|
||||
word_x += x;
|
||||
word_y += '-';
|
||||
return word_x, word_y;
|
107
code/python/src/hirschberg/types.py
Normal file
107
code/python/src/hirschberg/types.py
Normal file
@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# IMPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from __future__ import annotations;
|
||||
|
||||
from src.thirdparty.types import *;
|
||||
from src.thirdparty.maths import *;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# EXPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
__all__ = [
|
||||
'Alignment',
|
||||
'AlignmentBasic',
|
||||
'AlignmentPair',
|
||||
];
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# Class Alignments
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
class Alignment():
|
||||
@property
|
||||
def parts1(self) -> List[str]:
|
||||
if isinstance(self, AlignmentBasic):
|
||||
return [self.word1];
|
||||
elif isinstance(self, AlignmentPair):
|
||||
return self.left.parts1 + self.right.parts1;
|
||||
return [];
|
||||
|
||||
@property
|
||||
def parts2(self) -> List[str]:
|
||||
if isinstance(self, AlignmentBasic):
|
||||
return [self.word2];
|
||||
elif isinstance(self, AlignmentPair):
|
||||
return self.left.parts2 + self.right.parts2;
|
||||
return [];
|
||||
|
||||
def astree(
|
||||
self,
|
||||
indent: str = ' ',
|
||||
prefix: str = '',
|
||||
braces: bool = False,
|
||||
branch: str = '|____ ',
|
||||
) -> str:
|
||||
return '\n'.join(list(self._astree_recursion(indent=indent, prefix=prefix, braces=braces, branch=branch)));
|
||||
|
||||
def _astree_recursion(
|
||||
self,
|
||||
depth: int = 0,
|
||||
indent: str = ' ',
|
||||
prefix: str = '',
|
||||
braces: bool = False,
|
||||
branch: str = '|____ ',
|
||||
) -> Generator[str, None, None]:
|
||||
word1 = self.as_string1(braces=braces);
|
||||
word2 = self.as_string2(braces=braces);
|
||||
if isinstance(self, AlignmentBasic):
|
||||
u = prefix + branch if depth > 0 else prefix;
|
||||
yield f'{u}{word2}';
|
||||
yield f'{" "*len(u)}{word1}';
|
||||
elif isinstance(self, AlignmentPair):
|
||||
u = prefix + branch if depth > 0 else prefix;
|
||||
yield f'{u}{word2}';
|
||||
yield f'{" "*len(u)}{word1}';
|
||||
yield '';
|
||||
yield from self.left._astree_recursion(
|
||||
depth = depth + 1,
|
||||
indent = indent,
|
||||
prefix = indent + prefix,
|
||||
braces = braces,
|
||||
branch = branch,
|
||||
);
|
||||
yield '';
|
||||
yield from self.right._astree_recursion(
|
||||
depth = depth + 1,
|
||||
indent = indent,
|
||||
prefix = indent + prefix,
|
||||
braces = braces,
|
||||
branch = branch,
|
||||
);
|
||||
return;
|
||||
|
||||
def as_string1(self, braces: bool = False) -> Tuple[str, str]:
|
||||
if braces:
|
||||
return f'({")(".join(self.parts1)})';
|
||||
return ''.join(self.parts1);
|
||||
|
||||
def as_string2(self, braces: bool = False,) -> Tuple[str, str]:
|
||||
if braces:
|
||||
return f'({")(".join(self.parts2)})';
|
||||
return ''.join(self.parts2);
|
||||
|
||||
@dataclass
|
||||
class AlignmentBasic(Alignment):
|
||||
word1: str = field();
|
||||
word2: str = field();
|
||||
|
||||
@dataclass
|
||||
class AlignmentPair(Alignment):
|
||||
left: Alignment = field();
|
||||
right: Alignment = field();
|
@ -5,7 +5,7 @@
|
||||
# IMPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from src.local.typing import *;
|
||||
from src.thirdparty.types import *;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# EXPORTS
|
||||
|
@ -1,453 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# IMPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from __future__ import annotations;
|
||||
from src.local.typing import *;
|
||||
from src.local.maths import *;
|
||||
|
||||
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# EXPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
__all__ = [
|
||||
'hirschberg_algorithm',
|
||||
'hirschberg_algorithm_once',
|
||||
'DisplayMode'
|
||||
];
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# CONSTANTS / SETUP
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
class DisplayMode(Enum):
|
||||
NONE = -1;
|
||||
COSTS = 0;
|
||||
MOVES = 1;
|
||||
COSTS_AND_MOVES = 2;
|
||||
|
||||
class Directions(Enum):
|
||||
UNSET = -1;
|
||||
# Prioritäten hier setzen
|
||||
DIAGONAL = 0;
|
||||
HORIZONTAL = 1;
|
||||
VERTICAL = 2;
|
||||
|
||||
def gap_penalty(x: str):
|
||||
return 1;
|
||||
|
||||
def missmatch_penalty(x: str, y: str):
|
||||
return 0 if x == y else 1;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# METHOD hirschberg_algorithm
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def hirschberg_algorithm_once(
|
||||
X: str,
|
||||
Y: str,
|
||||
mode: DisplayMode = DisplayMode.NONE,
|
||||
) -> Tuple[str, str]:
|
||||
Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y);
|
||||
path = reconstruct_optimal_path(Moves=Moves);
|
||||
word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path);
|
||||
if mode != DisplayMode.NONE:
|
||||
repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y, mode=mode);
|
||||
print(f'\n{repr}');
|
||||
print(f'\n\x1b[1mOptimales Alignment:\x1b[0m');
|
||||
print('');
|
||||
print(word_y);
|
||||
print(len(word_x) * '-');
|
||||
print(word_x);
|
||||
print('');
|
||||
return word_x, word_y;
|
||||
|
||||
def hirschberg_algorithm(
|
||||
X: str,
|
||||
Y: str,
|
||||
mode: DisplayMode = DisplayMode.NONE,
|
||||
) -> Tuple[str, str]:
|
||||
alignments_x, alignments_y = hirschberg_algorithm_step(X=X, Y=Y, depth=1, mode=mode);
|
||||
word_x = ''.join(alignments_x);
|
||||
word_y = ''.join(alignments_y);
|
||||
if mode != DisplayMode.NONE:
|
||||
display_x = f'[{"][".join(alignments_x)}]';
|
||||
display_y = f'[{"][".join(alignments_y)}]';
|
||||
print(f'\n\x1b[1mOptimales Alignment:\x1b[0m');
|
||||
print('');
|
||||
print(display_y);
|
||||
print(len(display_x) * '-');
|
||||
print(display_x);
|
||||
print('');
|
||||
return word_x, word_y;
|
||||
|
||||
def hirschberg_algorithm_step(
|
||||
X: str,
|
||||
Y: str,
|
||||
depth: int = 0,
|
||||
mode: DisplayMode = DisplayMode.NONE,
|
||||
) -> Tuple[List[str], List[str]]:
|
||||
n = len(Y);
|
||||
if n == 1:
|
||||
Costs, Moves = compute_cost_matrix(X = '-' + X, Y = '-' + Y);
|
||||
path = reconstruct_optimal_path(Moves=Moves);
|
||||
word_x, word_y = reconstruct_words(X = '-' + X, Y = '-' + Y, moves=[Moves[coord] for coord in path], path=path);
|
||||
# if verbose:
|
||||
# repr = display_cost_matrix(Costs=Costs, path=path, X = '-' + X, Y = '-' + Y);
|
||||
# print(f'\n\x1b[1mRekursionstiefe: {depth}\x1b[0m\n\n{repr}')
|
||||
return [word_x], [word_y];
|
||||
else:
|
||||
n = int(np.ceil(n/2));
|
||||
|
||||
# bilde linke Hälfte vom horizontalen Wort:
|
||||
Y1 = Y[:n];
|
||||
X1 = X;
|
||||
|
||||
# bilde rechte Hälfte vom horizontalen Wort (und kehre h. + v. um):
|
||||
Y2 = Y[n:][::-1];
|
||||
X2 = X[::-1];
|
||||
|
||||
# Löse Teilprobleme:
|
||||
Costs1, Moves1 = compute_cost_matrix(X = '-' + X1, Y = '-' + Y1);
|
||||
Costs2, Moves2 = compute_cost_matrix(X = '-' + X2, Y = '-' + Y2);
|
||||
|
||||
if mode != DisplayMode.NONE:
|
||||
path1, path2 = reconstruct_optimal_path_halves(Costs1=Costs1, Costs2=Costs2, Moves1=Moves1, Moves2=Moves2);
|
||||
repr = display_cost_matrix_halves(
|
||||
Costs1 = Costs1,
|
||||
Costs2 = Costs2,
|
||||
path1 = path1,
|
||||
path2 = path2,
|
||||
X1 = '-' + X1,
|
||||
X2 = '-' + X2,
|
||||
Y1 = '-' + Y1,
|
||||
Y2 = '-' + Y2,
|
||||
mode = mode,
|
||||
);
|
||||
print(f'\n\x1b[1mRekursionstiefe: {depth}\x1b[0m\n\n{repr}')
|
||||
|
||||
# Koordinaten des optimalen Übergangs berechnen:
|
||||
coord1, coord2 = get_optimal_transition(Costs1=Costs1, Costs2=Costs2);
|
||||
p = coord1[0];
|
||||
# Divide and Conquer ausführen:
|
||||
alignments_x_1, alignments_y_1 = hirschberg_algorithm_step(X=X[:p], Y=Y[:n], depth=depth+1, verbose=verbose, mode=mode);
|
||||
alignments_x_2, alignments_y_2 = hirschberg_algorithm_step(X=X[p:], Y=Y[n:], depth=depth+1, verbose=verbose, mode=mode);
|
||||
|
||||
# Resultate zusammensetzen:
|
||||
alignments_x = alignments_x_1 + alignments_x_2;
|
||||
alignments_y = alignments_y_1 + alignments_y_2;
|
||||
if len(Y[:n]) <= 1 and len(Y[n:]) <= 1:
|
||||
# falls linke + rechte Hälfte nur aus <= 1 Buchstsaben bestehen, bestehen Alignment aus nur einem Teil ---> führe zusammen:
|
||||
alignments_x = [ ''.join(alignments_x) ];
|
||||
alignments_y = [ ''.join(alignments_y) ];
|
||||
return alignments_x, alignments_y;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# METHODS cost matrix + optimal paths
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def compute_cost_matrix(
|
||||
X: str,
|
||||
Y: str,
|
||||
) -> Tuple[NDArray[(Any, Any), int], NDArray[(Any, Any), Directions]]:
|
||||
'''
|
||||
Berechnet Hirschberg-Costs-Matrix (ohne Rekursion).
|
||||
|
||||
Annahmen:
|
||||
- X[0] = gap
|
||||
- Y[0] = gap
|
||||
'''
|
||||
m = len(X); # display vertically
|
||||
n = len(Y); # display horizontally
|
||||
Costs = np.full(shape=(m, n), dtype=int, fill_value=0);
|
||||
Moves = np.full(shape=(m, n), dtype=Directions, fill_value=Directions.UNSET);
|
||||
|
||||
# zuerst 0. Spalte und 0. Zeile ausfüllen:
|
||||
for i, x in list(enumerate(X))[1:]:
|
||||
update_cost_matrix(Costs, Moves, x, '', i, 0);
|
||||
|
||||
for j, y in list(enumerate(Y))[1:]:
|
||||
update_cost_matrix(Costs, Moves, '', y, 0, j);
|
||||
|
||||
# jetzt alle »inneren« Werte bestimmen:
|
||||
for i, x in list(enumerate(X))[1:]:
|
||||
for j, y in list(enumerate(Y))[1:]:
|
||||
update_cost_matrix(Costs, Moves, x, y, i, j);
|
||||
return Costs, Moves;
|
||||
|
||||
def update_cost_matrix(
|
||||
Costs: NDArray[(Any, Any), int],
|
||||
Moves: NDArray[(Any, Any), Directions],
|
||||
x: str,
|
||||
y: str,
|
||||
i: int,
|
||||
j: int,
|
||||
):
|
||||
'''
|
||||
Schrittweise Funktion zur Aktualisierung vom Eintrag `(i,j)` in der Kostenmatrix.
|
||||
|
||||
Annahme:
|
||||
- alle »Vorgänger« von `(i,j)` in der Matrix sind bereits optimiert.
|
||||
|
||||
@inputs
|
||||
- `Costs` - bisher berechnete Kostenmatrix
|
||||
- `Moves` - bisher berechnete optimale Schritte
|
||||
- `i`, `x` - Position und Wert in String `X` (»vertical« dargestellt)
|
||||
- `j`, `y` - Position und Wert in String `Y` (»horizontal« dargestellt)
|
||||
'''
|
||||
|
||||
# nichts zu tun, wenn (i, j) == (0, 0):
|
||||
if i == 0 and j == 0:
|
||||
Costs[0, 0] = 0;
|
||||
return;
|
||||
|
||||
################################
|
||||
# NOTE: Berechnung von möglichen Moves wie folgt.
|
||||
#
|
||||
# Fall 1: (i-1,j-1) ---> (i,j)
|
||||
# ==> Stringvergleich ändert sich wie folgt:
|
||||
# s1 s1 x
|
||||
# ---- ---> ------
|
||||
# s2 s2 y
|
||||
#
|
||||
# Fall 2: (i,j-1) ---> (i,j)
|
||||
# ==> Stringvergleich ändert sich wie folgt:
|
||||
# s1 s1 GAP
|
||||
# ---- ---> -------
|
||||
# s2 s2 y
|
||||
#
|
||||
# Fall 3: (i-1,j) ---> (i,j)
|
||||
# ==> Stringvergleich ändert sich wie folgt:
|
||||
# s1 s1 x
|
||||
# ---- ---> -------
|
||||
# s2 s2 GAP
|
||||
#
|
||||
# Diese Fälle berücksichtigen wir:
|
||||
################################
|
||||
edges = [];
|
||||
if i > 0 and j > 0:
|
||||
edges.append((
|
||||
Directions.DIAGONAL,
|
||||
Costs[i-1, j-1] + missmatch_penalty(x, y),
|
||||
));
|
||||
if j > 0:
|
||||
edges.append((
|
||||
Directions.HORIZONTAL,
|
||||
Costs[i, j-1] + gap_penalty(y),
|
||||
));
|
||||
if i > 0:
|
||||
edges.append((
|
||||
Directions.VERTICAL,
|
||||
Costs[i-1, j] + gap_penalty(x),
|
||||
));
|
||||
|
||||
if len(edges) > 0:
|
||||
# Sortiere nach Priorität (festgelegt in Enum):
|
||||
edges = sorted(edges, key=lambda x: x[0].value);
|
||||
# Wähle erste Möglichkeit mit minimalen Kosten:
|
||||
index = np.argmin([ cost for _, cost in edges]);
|
||||
Moves[i, j], Costs[i, j] = edges[index];
|
||||
return;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# METHODS optimaler treffpunkt
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def get_optimal_transition(
|
||||
Costs1: NDArray[(Any, Any), int],
|
||||
Costs2: NDArray[(Any, Any), int],
|
||||
) -> Tuple[Tuple[int, int], Tuple[int, int]]:
|
||||
'''
|
||||
Rekonstruiere »Treffpunkt«, wo die Gesamtkosten minimiert sind.
|
||||
Dieser Punkt stellt einen optimal Übergang für den Rekursionsschritt dar.
|
||||
'''
|
||||
(m, n1) = Costs1.shape;
|
||||
(m, n2) = Costs2.shape;
|
||||
info = [
|
||||
(
|
||||
Costs1[i, n1-1] + Costs2[m-1-i, n2-1],
|
||||
(i, n1-1),
|
||||
(m-1-i, n2-1),
|
||||
)
|
||||
for i in range(m)
|
||||
];
|
||||
index = np.argmin([ cost for cost, _, _ in info ]);
|
||||
coord1 = info[index][1];
|
||||
coord2 = info[index][2];
|
||||
return coord1, coord2;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# METHODS reconstruction von words/paths
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def reconstruct_optimal_path(
|
||||
Moves: NDArray[(Any, Any), Directions],
|
||||
coord: Optional[Tuple[int, int]] = None,
|
||||
) -> List[Tuple[int, int]]:
|
||||
'''
|
||||
Liest Matrix mit optimalen Schritten den optimalen Pfad aus,
|
||||
angenfangen von Endkoordinaten.
|
||||
'''
|
||||
if coord is None:
|
||||
m, n = Moves.shape;
|
||||
(i, j) = (m-1, n-1);
|
||||
else:
|
||||
(i, j) = coord;
|
||||
path = [(i, j)];
|
||||
while (i, j) != (0, 0):
|
||||
match Moves[i, j]:
|
||||
case Directions.DIAGONAL:
|
||||
(i, j) = (i - 1, j - 1);
|
||||
case Directions.HORIZONTAL:
|
||||
(i, j) = (i, j - 1);
|
||||
case Directions.VERTICAL:
|
||||
(i, j) = (i - 1, j);
|
||||
case _:
|
||||
break;
|
||||
path.append((i, j));
|
||||
return path[::-1];
|
||||
|
||||
def reconstruct_optimal_path_halves(
|
||||
Costs1: NDArray[(Any, Any), int],
|
||||
Costs2: NDArray[(Any, Any), int],
|
||||
Moves1: NDArray[(Any, Any), Directions],
|
||||
Moves2: NDArray[(Any, Any), Directions],
|
||||
) -> Tuple[List[Tuple[int, int]], List[Tuple[int, int]]]:
|
||||
'''
|
||||
Rekonstruiere optimale Pfad für Rekursionsschritt,
|
||||
wenn horizontales Wort in 2 aufgeteilt wird.
|
||||
'''
|
||||
coord1, coord2 = get_optimal_transition(Costs1=Costs1, Costs2=Costs2);
|
||||
path1 = reconstruct_optimal_path(Moves1, coord=coord1);
|
||||
path2 = reconstruct_optimal_path(Moves2, coord=coord2);
|
||||
return path1, path2;
|
||||
|
||||
def reconstruct_words(
|
||||
X: str,
|
||||
Y: str,
|
||||
moves: List[Directions],
|
||||
path: List[Tuple[int, int]],
|
||||
) -> Tuple[str, str]:
|
||||
word_x = '';
|
||||
word_y = '';
|
||||
for ((i, j), move) in zip(path, moves):
|
||||
x = X[i];
|
||||
y = Y[j];
|
||||
match move:
|
||||
case Directions.DIAGONAL:
|
||||
word_x += x;
|
||||
word_y += y;
|
||||
case Directions.HORIZONTAL:
|
||||
word_x += '-';
|
||||
word_y += y;
|
||||
case Directions.VERTICAL:
|
||||
word_x += x;
|
||||
word_y += '-';
|
||||
return word_x, word_y;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# AUXILIARY METHODS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
def represent_cost_matrix(
|
||||
Costs: NDArray[(Any, Any), int],
|
||||
path: List[Tuple[int, int]],
|
||||
X: str,
|
||||
Y: str,
|
||||
mode: DisplayMode,
|
||||
pad: bool = False,
|
||||
) -> NDArray[(Any, Any), Any]:
|
||||
m = len(X); # display vertically
|
||||
n = len(Y); # display horizontally
|
||||
|
||||
# erstelle string-Array:
|
||||
if pad:
|
||||
table = np.full(shape=(3 + m + 3, 3 + n + 1), dtype=object, fill_value='');
|
||||
else:
|
||||
table = np.full(shape=(3 + m, 3 + n), dtype=object, fill_value='');
|
||||
|
||||
# topmost rows:
|
||||
table[0, 3:(3+n)] = [str(j) for j in range(n)];
|
||||
table[1, 3:(3+n)] = [y for y in Y];
|
||||
table[2, 3:(3+n)] = '--';
|
||||
# leftmost columns:
|
||||
table[3:(3+m), 0] = [str(i) for i in range(m)];
|
||||
table[3:(3+m), 1] = [x for x in X];
|
||||
table[3:(3+m), 2] = '|';
|
||||
|
||||
if pad:
|
||||
table[-3, 3:(3+n)] = '--';
|
||||
table[3:(3+m), -1] = '|';
|
||||
|
||||
match mode:
|
||||
case DisplayMode.MOVES:
|
||||
table[3:(3+m), 3:(3+n)] = '.';
|
||||
for (i, j) in path:
|
||||
table[3 + i, 3 + j] = '*';
|
||||
case DisplayMode.COSTS | DisplayMode.COSTS_AND_MOVES:
|
||||
table[3:(3+m), 3:(3+n)] = Costs.copy();
|
||||
if mode == DisplayMode.COSTS_AND_MOVES:
|
||||
for (i, j) in path:
|
||||
table[3 + i, 3 + j] = f'{{{table[3 + i, 3 + j]}}}';
|
||||
|
||||
return table;
|
||||
|
||||
def display_cost_matrix(
|
||||
Costs: NDArray[(Any, Any), int],
|
||||
path: List[Tuple[int, int]],
|
||||
X: str,
|
||||
Y: str,
|
||||
mode: DisplayMode,
|
||||
) -> str:
|
||||
'''
|
||||
Zeigt Kostenmatrix + optimalen Pfad.
|
||||
|
||||
@inputs
|
||||
- `Costs` - Kostenmatrix
|
||||
- `Moves` - Kodiert die optimalen Schritte
|
||||
- `X`, `Y` - Strings
|
||||
|
||||
@returns
|
||||
- eine 'printable' Darstellung der Matrix mit den Strings X, Y + Indexes.
|
||||
'''
|
||||
table = represent_cost_matrix(Costs=Costs, path=path, X=X, Y=Y, mode=mode);
|
||||
# benutze pandas-Dataframe + tabulate, um schöner darzustellen:
|
||||
repr = tabulate(pd.DataFrame(table), showindex=False, stralign='center', tablefmt='plain');
|
||||
return repr;
|
||||
|
||||
def display_cost_matrix_halves(
|
||||
Costs1: NDArray[(Any, Any), int],
|
||||
Costs2: NDArray[(Any, Any), int],
|
||||
path1: List[Tuple[int, int]],
|
||||
path2: List[Tuple[int, int]],
|
||||
X1: str,
|
||||
X2: str,
|
||||
Y1: str,
|
||||
Y2: str,
|
||||
mode: DisplayMode,
|
||||
) -> str:
|
||||
'''
|
||||
Zeigt Kostenmatrix + optimalen Pfad für Schritt im D & C Hirschberg-Algorithmus
|
||||
|
||||
@inputs
|
||||
- `Costs1`, `Costs2` - Kostenmatrizen
|
||||
- `Moves1`, `Moves2` - Kodiert die optimalen Schritte
|
||||
- `X1`, `X2`, `Y1`, `Y2` - Strings
|
||||
|
||||
@returns
|
||||
- eine 'printable' Darstellung der Matrix mit den Strings X, Y + Indexes.
|
||||
'''
|
||||
table1 = represent_cost_matrix(Costs=Costs1, path=path1, X=X1, Y=Y1, mode=mode, pad=True);
|
||||
table2 = represent_cost_matrix(Costs=Costs2, path=path2, X=X2, Y=Y2, mode=mode, pad=True);
|
||||
|
||||
# merge Taellen:
|
||||
table = np.concatenate([table1[:, :-1], table2[::-1, ::-1]], axis=1);
|
||||
|
||||
# benutze pandas-Dataframe + tabulate, um schöner darzustellen:
|
||||
repr = tabulate(pd.DataFrame(table), showindex=False, stralign='center', tablefmt='plain');
|
||||
return repr;
|
@ -5,6 +5,8 @@
|
||||
# IMPORTS
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from dataclasses import dataclass;
|
||||
from dataclasses import field;
|
||||
from enum import Enum;
|
||||
from types import TracebackType;
|
||||
from typing import Any;
|
||||
@ -25,6 +27,8 @@ from nptyping import NDArray;
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
__all__ = [
|
||||
'dataclass',
|
||||
'field',
|
||||
'Enum',
|
||||
'TracebackType',
|
||||
'Any',
|
@ -6,8 +6,8 @@
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
from __future__ import annotations;
|
||||
from src.local.typing import *;
|
||||
from src.local.maths import *;
|
||||
from src.thirdparty.types import *;
|
||||
from src.thirdparty.maths import *;
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# EXPORTS
|
||||
|
@ -12,7 +12,7 @@ from pytest import lazy_fixture;
|
||||
from unittest import TestCase;
|
||||
from unittest.mock import patch;
|
||||
|
||||
from src.local.typing import *;
|
||||
from src.thirdparty.types import *;
|
||||
from src.graphs.graph import *;
|
||||
from src.graphs.tarjan import *;
|
||||
|
Loading…
x
Reference in New Issue
Block a user