[Yum-devel] [PATCH] Improve relevance ordering of search results.

Seth Vidal skvidal at fedoraproject.org
Mon Oct 26 20:07:08 UTC 2009



On Mon, 26 Oct 2009, Ville Skyttä wrote:

> Weigh criteria descending based on order they're given in, and take
> into account the number of fields hit by each combination as well.
> Also preserve order of given criteria in return values and console
> display.
> ---
> cli.py          |    2 +-
> yum/__init__.py |   19 +++++++++++++------
> 2 files changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/cli.py b/cli.py
> index 5eed6c4..2d9c023 100644
> --- a/cli.py
> +++ b/cli.py
> @@ -803,7 +803,7 @@ class YumBaseCli(yum.YumBase, output.YumOutput):
>             if keys != okeys:
>                 if akeys:
>                     print ""
> -                print self.fmtSection("Matched: " + ", ".join(sorted(keys)))
> +                print self.fmtSection("Matched: " + ", ".join(keys))
>                 okeys = keys
>                 akeys.update(keys)
>             self.matchcallback(po, matched_value, args)
> diff --git a/yum/__init__.py b/yum/__init__.py
> index ff5e485..4212907 100644
> --- a/yum/__init__.py
> +++ b/yum/__init__.py
> @@ -1957,13 +1957,17 @@ class YumBase(depsolve.Depsolve):
>         sorted_lists = {}
>         tmpres = []
>         real_crit = []
> -        for s in criteria:
> -            real_crit.append(s)
>         real_crit_lower = [] # Take the s.lower()'s out of the loop
>         rcl2c = {}
> +        # weigh terms in given order (earlier = more relevant)
> +        critweight = 0
> +        critweights = {}
>         for s in criteria:
> +            real_crit.append(s)
>             real_crit_lower.append(s.lower())
>             rcl2c[s.lower()] = s
> +            critweights.setdefault(s, critweight)
> +            critweight -= 1
>
>         for sack in self.pkgSack.sacks.values():
>             tmpres.extend(sack.searchPrimaryFieldsMultipleStrings(sql_fields, real_crit))
> @@ -1972,7 +1976,7 @@ class YumBase(depsolve.Depsolve):
>             for (po, count) in tmpres:
>                 # check the pkg for sanity
>                 # pop it into the sorted lists
> -                tmpkeys   = set()
> +                tmpkeys   = [] # use list to preserve given criteria order
>                 tmpvalues = []
>                 if count not in sorted_lists: sorted_lists[count] = []
>                 for s in real_crit_lower:
> @@ -1980,7 +1984,8 @@ class YumBase(depsolve.Depsolve):
>                         value = to_unicode(getattr(po, field))
>                         if value and value.lower().find(s) != -1:
>                             tmpvalues.append(value)
> -                            tmpkeys.add(rcl2c[s])
> +                            if rcl2c[s] not in tmpkeys:
> +                                tmpkeys.append(rcl2c[s])
>
>                 if len(tmpvalues) > 0:
>                     sorted_lists[count].append((po, tmpkeys, tmpvalues))
> @@ -1998,8 +2003,10 @@ class YumBase(depsolve.Depsolve):
>         # By default just sort using package sorting
>         sort_func = operator.itemgetter(0)
>         if keys:
> -            # Take into account the keys found, as well
> -            sort_func = lambda x: "%s%s" % ("\0".join(sorted(x[1])), str(x[0]))
> +            # Take into account the keys found, their original order,
> +            # and number of fields hit as well
> +            sort_func = lambda x: (-sum((critweights[y] for y in x[1])),
> +                                   "\0".join(sorted(x[1])), -len(x[2]), x[0])
>         yielded = {}
>         for val in reversed(sorted(sorted_lists)):
>             for (po, ks, vs) in sorted(sorted_lists[val], key=sort_func):

I'm inclined toward ACK. Have you noticed any speed impact?

-sv


More information about the Yum-devel mailing list