[Yum-devel] [PATCH] Improve relevance ordering of search results.
Seth Vidal
skvidal at fedoraproject.org
Mon Oct 26 20:07:08 UTC 2009
On Mon, 26 Oct 2009, Ville Skyttä wrote:
> Weigh criteria descending based on order they're given in, and take
> into account the number of fields hit by each combination as well.
> Also preserve order of given criteria in return values and console
> display.
> ---
> cli.py | 2 +-
> yum/__init__.py | 19 +++++++++++++------
> 2 files changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/cli.py b/cli.py
> index 5eed6c4..2d9c023 100644
> --- a/cli.py
> +++ b/cli.py
> @@ -803,7 +803,7 @@ class YumBaseCli(yum.YumBase, output.YumOutput):
> if keys != okeys:
> if akeys:
> print ""
> - print self.fmtSection("Matched: " + ", ".join(sorted(keys)))
> + print self.fmtSection("Matched: " + ", ".join(keys))
> okeys = keys
> akeys.update(keys)
> self.matchcallback(po, matched_value, args)
> diff --git a/yum/__init__.py b/yum/__init__.py
> index ff5e485..4212907 100644
> --- a/yum/__init__.py
> +++ b/yum/__init__.py
> @@ -1957,13 +1957,17 @@ class YumBase(depsolve.Depsolve):
> sorted_lists = {}
> tmpres = []
> real_crit = []
> - for s in criteria:
> - real_crit.append(s)
> real_crit_lower = [] # Take the s.lower()'s out of the loop
> rcl2c = {}
> + # weigh terms in given order (earlier = more relevant)
> + critweight = 0
> + critweights = {}
> for s in criteria:
> + real_crit.append(s)
> real_crit_lower.append(s.lower())
> rcl2c[s.lower()] = s
> + critweights.setdefault(s, critweight)
> + critweight -= 1
>
> for sack in self.pkgSack.sacks.values():
> tmpres.extend(sack.searchPrimaryFieldsMultipleStrings(sql_fields, real_crit))
> @@ -1972,7 +1976,7 @@ class YumBase(depsolve.Depsolve):
> for (po, count) in tmpres:
> # check the pkg for sanity
> # pop it into the sorted lists
> - tmpkeys = set()
> + tmpkeys = [] # use list to preserve given criteria order
> tmpvalues = []
> if count not in sorted_lists: sorted_lists[count] = []
> for s in real_crit_lower:
> @@ -1980,7 +1984,8 @@ class YumBase(depsolve.Depsolve):
> value = to_unicode(getattr(po, field))
> if value and value.lower().find(s) != -1:
> tmpvalues.append(value)
> - tmpkeys.add(rcl2c[s])
> + if rcl2c[s] not in tmpkeys:
> + tmpkeys.append(rcl2c[s])
>
> if len(tmpvalues) > 0:
> sorted_lists[count].append((po, tmpkeys, tmpvalues))
> @@ -1998,8 +2003,10 @@ class YumBase(depsolve.Depsolve):
> # By default just sort using package sorting
> sort_func = operator.itemgetter(0)
> if keys:
> - # Take into account the keys found, as well
> - sort_func = lambda x: "%s%s" % ("\0".join(sorted(x[1])), str(x[0]))
> + # Take into account the keys found, their original order,
> + # and number of fields hit as well
> + sort_func = lambda x: (-sum((critweights[y] for y in x[1])),
> + "\0".join(sorted(x[1])), -len(x[2]), x[0])
> yielded = {}
> for val in reversed(sorted(sorted_lists)):
> for (po, ks, vs) in sorted(sorted_lists[val], key=sort_func):
I'm inclined toward ACK. Have you noticed any speed impact?
-sv
More information about the Yum-devel
mailing list