rewrite sorting of parts

aaargh, iterables can only used once!
This commit is contained in:
flukx 2024-08-18 09:34:00 +02:00
parent 72821a1203
commit 696a13acac

View file

@ -72,6 +72,32 @@ async def find_parts(parts: list[Part], search_string: str, max_number: int=10)
list of parts that somehow include the search string
"""
fuzzy = re.compile(".*" + ".*".join(search_string) + ".*", flags=re.IGNORECASE)
score_categories = ("startswith", "startswithLower", "inLower", "fuzzy")
def match_score_one_string(string: Optional[str]) -> dict[str, float]:
"""Sortable tupe of decreasing importance. Compare just string to search string."""
if string is None:
return {category: 0 for category in score_categories}
return {
"startswith": antilen(string) if string.startswith(search_string) else 0,
"startswithLower": (antilen(string)
if string.lower().startswith(search_string.lower())
else 0),
"inLower": antilen(string) if search_string.lower() in string.lower() else 0,
"fuzzy": antilen(string) if bool(fuzzy.match(string)) else 0,
}
def max_scores(scores: tuple[dict[str, float]]):
"""Use best score.
Assume that all dicts have the same keys: score_categories
"""
try:
return {
key: max((score[key] for score in scores)) for key in score_categories
}
except ValueError: # if iterable is empty
return {category: 0 for category in score_categories}
def match_score(part):
"""Return sortable tuple of decreasing importance.
@ -80,33 +106,34 @@ async def find_parts(parts: list[Part], search_string: str, max_number: int=10)
"""
alt_names = itertools.chain(part.name_alt_de if hasattr(part, "name_alt_de") else [],
part.name_alt_en if hasattr(part, "name_alt_en") else [])
score_name = match_score_one_string(part.name)
score_name_lang = max_scores(tuple(map(match_score_one_string,
(part.get("name_en", None),
part.get("name_de", None)
)
)
))
score_name_alt = max_scores(tuple(map(match_score_one_string, alt_names)))
score_description = max_scores(tuple(map(match_score_one_string,
(part.get("description_en", None),
part.get("description_de", None))
)
))
return (
# first prio: name at begin. Prefer short
antilen(part.name) if part.name.startswith(search_string) else 0,
# second prio: part.name_en and name_de at begin. Prefer short
max(1 / len(part.name_en)
if hasattr(part, "name_en") and part.name_en.startswith(search_string)
else 0,
1 / len(part.name_de)
if hasattr(part, "name_de") and part.name_de.startswith(search_string)
else 0),
# third prio: name with all letters appearing with gaps in correct order ("fuzzy match")
bool(fuzzy.match(part.name)),
# forth prio: name_en and name_de fuzzy match
(hasattr(part, "name_en") and bool(fuzzy.match(part.name_en)))
or (hasattr(part, "name_de") and bool(fuzzy.match(part.name_de))),
# fith prio: alternative name at begin
max((1 / len(alt_name) for alt_name in alt_names if alt_name.startswith(search_string)),
default=0),
# sixth prio: alternative name fuzzy match
max(map(antilen, filter(fuzzy.match, alt_names)),
default=0),
# seventh prio: description
max(int(hasattr(part, "description_de") and search_string in part.description_de) * 3,
int(hasattr(part, "description_de") and bool(fuzzy.match(part.description_de))),
int(hasattr(part, "description_en") and search_string in part.description_en) * 3,
int(hasattr(part, "description_en") and bool(fuzzy.match(part.description_en)))
)
score_name['startswith'],
score_name_lang['startswith'],
score_name['startswithLower'],
score_name_lang['startswithLower'],
score_name_alt['startswith'],
score_name_alt['startswithLower'],
score_name['inLower'],
score_name_lang['inLower'],
score_name_alt['inLower'],
score_name['fuzzy'],
score_name_lang['fuzzy'],
score_name_alt['fuzzy'],
score_description['inLower'],
score_description['fuzzy']
)
if search_string:
scored_parts = [(part, match_score(part)) for part in parts]