rewrite sorting of parts
aaargh, iterables can only used once!
This commit is contained in:
parent
72821a1203
commit
696a13acac
1 changed files with 53 additions and 26 deletions
|
@ -72,6 +72,32 @@ async def find_parts(parts: list[Part], search_string: str, max_number: int=10)
|
|||
list of parts that somehow include the search string
|
||||
"""
|
||||
fuzzy = re.compile(".*" + ".*".join(search_string) + ".*", flags=re.IGNORECASE)
|
||||
score_categories = ("startswith", "startswithLower", "inLower", "fuzzy")
|
||||
|
||||
def match_score_one_string(string: Optional[str]) -> dict[str, float]:
|
||||
"""Sortable tupe of decreasing importance. Compare just string to search string."""
|
||||
if string is None:
|
||||
return {category: 0 for category in score_categories}
|
||||
return {
|
||||
"startswith": antilen(string) if string.startswith(search_string) else 0,
|
||||
"startswithLower": (antilen(string)
|
||||
if string.lower().startswith(search_string.lower())
|
||||
else 0),
|
||||
"inLower": antilen(string) if search_string.lower() in string.lower() else 0,
|
||||
"fuzzy": antilen(string) if bool(fuzzy.match(string)) else 0,
|
||||
}
|
||||
|
||||
def max_scores(scores: tuple[dict[str, float]]):
|
||||
"""Use best score.
|
||||
|
||||
Assume that all dicts have the same keys: score_categories
|
||||
"""
|
||||
try:
|
||||
return {
|
||||
key: max((score[key] for score in scores)) for key in score_categories
|
||||
}
|
||||
except ValueError: # if iterable is empty
|
||||
return {category: 0 for category in score_categories}
|
||||
|
||||
def match_score(part):
|
||||
"""Return sortable tuple of decreasing importance.
|
||||
|
@ -80,33 +106,34 @@ async def find_parts(parts: list[Part], search_string: str, max_number: int=10)
|
|||
"""
|
||||
alt_names = itertools.chain(part.name_alt_de if hasattr(part, "name_alt_de") else [],
|
||||
part.name_alt_en if hasattr(part, "name_alt_en") else [])
|
||||
score_name = match_score_one_string(part.name)
|
||||
score_name_lang = max_scores(tuple(map(match_score_one_string,
|
||||
(part.get("name_en", None),
|
||||
part.get("name_de", None)
|
||||
)
|
||||
)
|
||||
))
|
||||
score_name_alt = max_scores(tuple(map(match_score_one_string, alt_names)))
|
||||
score_description = max_scores(tuple(map(match_score_one_string,
|
||||
(part.get("description_en", None),
|
||||
part.get("description_de", None))
|
||||
)
|
||||
))
|
||||
return (
|
||||
# first prio: name at begin. Prefer short
|
||||
antilen(part.name) if part.name.startswith(search_string) else 0,
|
||||
# second prio: part.name_en and name_de at begin. Prefer short
|
||||
max(1 / len(part.name_en)
|
||||
if hasattr(part, "name_en") and part.name_en.startswith(search_string)
|
||||
else 0,
|
||||
1 / len(part.name_de)
|
||||
if hasattr(part, "name_de") and part.name_de.startswith(search_string)
|
||||
else 0),
|
||||
# third prio: name with all letters appearing with gaps in correct order ("fuzzy match")
|
||||
bool(fuzzy.match(part.name)),
|
||||
# forth prio: name_en and name_de fuzzy match
|
||||
(hasattr(part, "name_en") and bool(fuzzy.match(part.name_en)))
|
||||
or (hasattr(part, "name_de") and bool(fuzzy.match(part.name_de))),
|
||||
# fith prio: alternative name at begin
|
||||
max((1 / len(alt_name) for alt_name in alt_names if alt_name.startswith(search_string)),
|
||||
default=0),
|
||||
# sixth prio: alternative name fuzzy match
|
||||
max(map(antilen, filter(fuzzy.match, alt_names)),
|
||||
default=0),
|
||||
# seventh prio: description
|
||||
max(int(hasattr(part, "description_de") and search_string in part.description_de) * 3,
|
||||
int(hasattr(part, "description_de") and bool(fuzzy.match(part.description_de))),
|
||||
int(hasattr(part, "description_en") and search_string in part.description_en) * 3,
|
||||
int(hasattr(part, "description_en") and bool(fuzzy.match(part.description_en)))
|
||||
)
|
||||
score_name['startswith'],
|
||||
score_name_lang['startswith'],
|
||||
score_name['startswithLower'],
|
||||
score_name_lang['startswithLower'],
|
||||
score_name_alt['startswith'],
|
||||
score_name_alt['startswithLower'],
|
||||
score_name['inLower'],
|
||||
score_name_lang['inLower'],
|
||||
score_name_alt['inLower'],
|
||||
score_name['fuzzy'],
|
||||
score_name_lang['fuzzy'],
|
||||
score_name_alt['fuzzy'],
|
||||
score_description['inLower'],
|
||||
score_description['fuzzy']
|
||||
)
|
||||
if search_string:
|
||||
scored_parts = [(part, match_score(part)) for part in parts]
|
||||
|
|
Loading…
Reference in a new issue