一亩三分地论坛

 找回密码
 获取更多干货,去instant注册!

扫码关注一亩三分地公众号
查看: 4135|回复: 2
收起左侧

[CS61A]Mapproj

[复制链接] |试试Instant~ |关注本帖
sky420 发表于 2015-5-20 11:19:39 | 显示全部楼层 |阅读模式

[其他]CS61A #2 - 04@UCBerkely

注册一亩三分地论坛,查看更多干货!

您需要 登录 才可以下载或查看,没有帐号?获取更多干货,去instant注册!

x
这个project是optional的,而且有些内容还没有学过,似乎John不上课的内容顺序调整了。有兴趣的同学可以看一下。
http://gaotx.com/cs61a/proj/maps/
goldpanda 发表于 2015-5-21 02:05:19 | 显示全部楼层
"""Utilities for Maps"""

from math import sqrt
from random import sample

# Rename the built-in zip (http://docs.python.org/3/library/functions.html#zip)
_zip = zip

def map_and_filter(s, map_fn, filter_fn):
    """Return a new list containing the result of calling MAP_FUNC on each
    element of sequence S for which FILTER_FUNC returns a true value.

    >>> square = lambda x: x * x
    >>> is_odd = lambda x: x % 2 == 1
    >>> map_and_filter([1, 2, 3, 4, 5], square, is_odd)
    [1, 9, 25]
    """
    return [map_fn(x) for x in s if filter_fn(x)]

def key_of_min_value(d):
    """Returns the key in dict D that corresponds to the minimum value of D.

    >>> letters = {'a': 6, 'b': 5, 'c': 4, 'd': 5}
    >>> min(letters)
    'a'
    >>> key_of_min_value(letters)
    'c'
    """
    return min([k for k in d.keys()], key = lambda x: d[x])

def zip(*sequences):
    """Returns a list of lists, where the i-th list contains the i-th
    element from each of the argument sequences.

    >>> zip(range(0, 3), range(3, 6))
    [[0, 3], [1, 4], [2, 5]]
    >>> for a, b in zip([1, 2, 3], [4, 5, 6]):
    ...     print(a, b)
    1 4
    2 5
    3 6
    >>> for triple in zip(['a', 'b', 'c'], [1, 2, 3], ['do', 're', 'mi']):
    ...     print(triple)
    ['a', 1, 'do']
    ['b', 2, 're']
    ['c', 3, 'mi']
    """
    return list(map(list, _zip(*sequences)))

def enumerate(s, start=0):
    """Returns a list of lists, where the i-th list contains i+start and the
    i-th element of the sequence.


    >>> enumerate([6, 1, 'a'])
    [[0, 6], [1, 1], [2, 'a']]
    >>> enumerate('five', 5)
    [[5, 'f'], [6, 'i'], [7, 'v'], [8, 'e']]
    """
    return zip([i+start for i in range(len(s))],s)

def distance(pos1, pos2):
    """Return the Euclidean distance between POS1 and POS2, which are pairs.

    >>> distance([1, 2], [4, 6])
    5.0
    """
    return sqrt((pos1[0] - pos2[0]) ** 2 + (pos1[1] - pos2[1]) ** 2)

def mean(lst):
    """Return the arithmetic mean of a sequence of numbers.

    >>> mean([-1, 3])
    1.0
    >>> mean([0, -3, 2, -1])
    -0.5
    """
    assert len(lst) > 0, 'cannot find mean of empty sequence'
    return sum(lst) / len(lst)

"""Data Abstractions"""

from utils import mean

# Reviews

def make_review(restaurant_name, rating):
    """Return a review."""
    return [restaurant_name, rating]

def review_restaurant_name(review):
    """Return the reviewed restaurant's name (string)."""
    return review[0]

def review_rating(review):
    """Return the number of stars given (1 to 5)."""
    return review[1]

# Users

def make_user(name, reviews):
    """Return a user."""
    return [name, {review_restaurant_name(r): r for r in reviews}]

def user_name(user):
    """Return the USER's name (string)."""
    return user[0]

def user_reviews(user):
    """Return a dictionary from restaurant names to reviews by the USER."""
    return user[1]

### === +++ USER ABSTRACTION BARRIER +++ === ###

def user_reviewed_restaurants(user, restaurants):
    """Return the subset of restaurants reviewed by USER.

    Arguments:
    user -- a user
    restaurants -- a dictionary from restaurant names to restaurants
    """
    names = user_reviews(user).keys()
    return {name: restaurants[name] for name in names if name in restaurants}

def user_rating(user, restaurant_name):
    """Return the rating given for RESTAURANT_NAME by USER."""
    return review_rating(user_reviews(user)[restaurant_name])

# Restaurants

def make_restaurant(name, location, categories, price, reviews):
    """Return a restaurant, implemented as a dictionary."""
    # You may change this starter implementation however you wish, including
    # adding more items to the dictionary below.
    return {'name': name,
            'location': location,
            'categories': categories,
            'price': price,
            'reviews': reviews
            }

def restaurant_name(restaurant):
    return restaurant['name']

def restaurant_location(restaurant):
    return restaurant['location']

def restaurant_categories(restaurant):
    return restaurant['categories']

def restaurant_price(restaurant):
    return restaurant['price']

def restaurant_ratings(restaurant):
    """Return a list of ratings (numbers from 1 to 5)."""
    return [review_rating(r) for r in restaurant['reviews']]

### === +++ RESTAURANT ABSTRACTION BARRIER +++ === ###

def restaurant_num_ratings(restaurant):
    """Return the number of ratings for RESTAURANT."""
    return len(restaurant_ratings(restaurant))

def restaurant_mean_rating(restaurant):
    """Return the average rating for RESTAURANT."""
    return mean(restaurant_ratings(restaurant))

"""A Yelp-powered Restaurant Recommendation Program"""

from abstractions import *
from utils import distance, mean, zip, enumerate, sample
from visualize import draw_map
from data import RESTAURANTS, CATEGORIES, USER_FILES, load_user_file
from ucb import main, trace, interact

def find_closest(location, centroids):
    """Return the item in CENTROIDS that is closest to LOCATION. If two
    centroids are equally close, return the first one.

    >>> find_closest([3, 4], [[0, 0], [2, 3], [4, 3], [5, 5]])
    [2, 3]
    """
    return min([l for l in centroids], key = lambda x: distance(location,x))

def group_by_first(pairs):
    """Return a list of pairs that relates each unique key in [key, value]
    pairs to a list of all values that appear paired with that key.

    Arguments:
    pairs -- a sequence of pairs

    >>> example = [ [1, 2], [3, 2], [2, 4], [1, 3], [3, 1], [1, 2] ]
    >>> group_by_first(example)
    [[2, 3, 2], [2, 1], [4]]
    """
    # Optional: This implementation is slow because it traverses the list of
    #           pairs one time for each key. Can you improve it?
    keys = []
    for key, _ in pairs:
        if key not in keys:
            keys.append(key)
    return [[y for x, y in pairs if x == key] for key in keys]

def group_by_centroid(restaurants, centroids):
    """Return a list of lists, where each list contains all restaurants nearest
    to some item in CENTROIDS. Each item in RESTAURANTS should appear once in
    the result, along with the other restaurants nearest to the same centroid.
    No empty lists should appear in the result.
    """
    return group_by_first([[find_closest(restaurant_location(r), centroids),
                            r] for r in restaurants ])

def find_centroid(restaurants):
    """Return the centroid of the locations of RESTAURANTS."""
    locations = [restaurant_location(r) for r in restaurants]
    return[mean([x[0] for x in locations]), mean([x[1] for x in locations])]

def k_means(restaurants, k, max_updates=100):
    """Use k-means to group RESTAURANTS by location into K clusters."""
    assert len(restaurants) >= k, 'Not enough restaurants to cluster'
    old_centroids, n = [], 0
    # Select initial centroids randomly by choosing K different restaurants
    centroids = [restaurant_location(r) for r in sample(restaurants, k)]

    while old_centroids != centroids and n < max_updates:
        old_centroids = centroids
        clusters = group_by_centroid(restaurants,old_centroids)
        centroids = [find_centroid(r) for r in clusters if len(r)!=0]
        n += 1
    return centroids

def find_predictor(user, restaurants, feature_fn):
    """Return a rating predictor (a function from restaurants to ratings),
    for USER by performing least-squares linear regression using FEATURE_FN
    on the items in RESTAURANTS. Also, return the R^2 value of this model.

    Arguments:
    user -- A user
    restaurants -- A sequence of restaurants
    feature_fn -- A function that takes a restaurant and returns a number
    """
    reviews_by_user = {review_restaurant_name(review): review_rating(review)
                       for review in user_reviews(user).values()}

    xs = [feature_fn(r) for r in restaurants]
    ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

    meanx = mean(xs)
    meany = mean(ys)
    sxx = sum([pow(x - meanx, 2) for x in xs])
    syy = sum([pow(y - meany, 2) for y in ys])
    sxy = sum([(x-meanx)*(y-meany) for x,y in zip(xs,ys)])

    b, a, r_squared = sxy/sxx, meany-sxy/sxx*meanx, pow(sxy, 2)/(sxx*syy)

    def predictor(restaurant):
        return b * feature_fn(restaurant) + a

    return predictor, r_squared

def best_predictor(user, restaurants, feature_fns):
    """Find the feature within FEATURE_FNS that gives the highest R^2 value
    for predicting ratings by the user; return a predictor using that feature.

    Arguments:
    user -- A user
    restaurants -- A dictionary from restaurant names to restaurants
    feature_fns -- A sequence of functions that each takes a restaurant
    """
    reviewed = list(user_reviewed_restaurants(user, restaurants).values())

    return max([find_predictor(user, reviewed, f) for f in feature_fns], key = lambda x: x[1])[0]

def rate_all(user, restaurants, feature_functions):
    """Return the predicted ratings of RESTAURANTS by USER using the best
    predictor based a function from FEATURE_FUNCTIONS.

    Arguments:
    user -- A user
    restaurants -- A dictionary from restaurant names to restaurants
    """
    # Use the best predictor for the user, learned from *all* restaurants
    # (Note: the name RESTAURANTS is bound to a dictionary of all restaurants)
    predictor = best_predictor(user, RESTAURANTS, feature_functions)
    user_reviewed = user_reviewed_restaurants(user, restaurants)
   
    d = {}
    for name in restaurants.keys():
        if name in user_reviewed.keys():
            d[name] = user_rating(user, name)
        else:
            d[name] = predictor(RESTAURANTS[name])
   

    return d

def search(query, restaurants):
    """Return each restaurant in RESTAURANTS that has QUERY as a category.

    Arguments:
    query -- A string
    restaurants -- A sequence of restaurants
    """
    return [r for r in restaurants if query in restaurant_categories(r)]

def feature_set():
    """Return a sequence of feature functions."""
    return [restaurant_mean_rating,
            restaurant_price,
            restaurant_num_ratings,
            lambda r: restaurant_location(r)[0],
            lambda r: restaurant_location(r)[1]]

@main
def main(*args):
    import argparse
    parser = argparse.ArgumentParser(
        description='Run Recommendations',
        formatter_class=argparse.RawTextHelpFormatter
    )
    parser.add_argument('-u', '--user', type=str, choices=USER_FILES,
                        default='test_user',
                        metavar='USER',
                        help='user file, e.g.\n' +
                        '{{{}}}'.format(','.join(sample(USER_FILES, 3))))
    parser.add_argument('-k', '--k', type=int, help='for k-means')
    parser.add_argument('-q', '--query', choices=CATEGORIES,
                        metavar='QUERY',
                        help='search for restaurants by category e.g.\n'
                        '{{{}}}'.format(','.join(sample(CATEGORIES, 3))))
    parser.add_argument('-p', '--predict', action='store_true',
                        help='predict ratings for all restaurants')
    args = parser.parse_args()

    # Select restaurants using a category query
    if args.query:
        results = search(args.query, RESTAURANTS.values())
        restaurants = {restaurant_name(r): r for r in results}
    else:
        restaurants = RESTAURANTS

    # Load a user
    assert args.user, 'A --user is required to draw a map'
    user = load_user_file('{}.dat'.format(args.user))

    # Collect ratings
    if args.predict:
        ratings = rate_all(user, restaurants, feature_set())
    else:
        restaurants = user_reviewed_restaurants(user, restaurants)
        ratings = {name: user_rating(user, name) for name in restaurants}

    # Draw the visualization
    restaurant_list = list(restaurants.values())
    if args.k:
        centroids = k_means(restaurant_list, min(args.k, len(restaurant_list)))
    else:
        centroids = [restaurant_location(r) for r in restaurant_list]
    draw_map(centroids, restaurant_list, ratings)

评分

1

查看全部评分

回复 支持 反对

使用道具 举报

liyimeng 发表于 2016-2-8 12:25:56 | 显示全部楼层
写了大概7,8个小时。。。感觉代码本身不难,但是挺抽象的。。。
maps.png

代码
utils.py
  1. """Utilities for Maps"""

  2. from math import sqrt
  3. from random import sample

  4. # Rename the built-in zip (http://docs.python.org/3/library/functions.html#zip)
  5. _zip = zip

  6. def map_and_filter(s, map_fn, filter_fn):
  7.     """Return a new list containing the result of calling MAP_FUNC on each
  8.     element of sequence S for which FILTER_FUNC returns a true value.

  9.     >>> square = lambda x: x * x
  10.     >>> is_odd = lambda x: x % 2 == 1
  11.     >>> map_and_filter([1, 2, 3, 4, 5], square, is_odd)
  12.     [1, 9, 25]
  13.     """
  14.     return [map_fn(x) for x in s if filter_fn(x)]

  15. def key_of_min_value(d):
  16.     """Returns the key in dict D that corresponds to the minimum value of D.

  17.     >>> letters = {'a': 6, 'b': 5, 'c': 4, 'd': 5}
  18.     >>> min(letters)
  19.     'a'
  20.     >>> key_of_min_value(letters)
  21.     'c'
  22.     """
  23.     return min(list(d.keys()), key = lambda x: d[x])

  24. def zip(*sequences):
  25.     """Returns a list of lists, where the i-th list contains the i-th
  26.     element from each of the argument sequences.

  27.     >>> zip(range(0, 3), range(3, 6))
  28.     [[0, 3], [1, 4], [2, 5]]
  29.     >>> for a, b in zip([1, 2, 3], [4, 5, 6]):
  30.     ...     print(a, b)
  31.     1 4
  32.     2 5
  33.     3 6
  34.     >>> for triple in zip(['a', 'b', 'c'], [1, 2, 3], ['do', 're', 'mi']):
  35.     ...     print(triple)
  36.     ['a', 1, 'do']
  37.     ['b', 2, 're']
  38.     ['c', 3, 'mi']
  39.     """
  40.     return list(map(list, _zip(*sequences)))

  41. def enumerate(s, start=0):
  42.     """Returns a list of lists, where the i-th list contains i+start and the
  43.     i-th element of the sequence.


  44.     >>> enumerate([6, 1, 'a'])
  45.     [[0, 6], [1, 1], [2, 'a']]
  46.     >>> enumerate('five', 5)
  47.     [[5, 'f'], [6, 'i'], [7, 'v'], [8, 'e']]
  48.     """
  49.     return zip(range(start, len(s) + start), s)

  50. def distance(pos1, pos2):
  51.     """Return the Euclidean distance between POS1 and POS2, which are pairs.

  52.     >>> distance([1, 2], [4, 6])
  53.     5.0
  54.     """
  55.     return sqrt((pos1[0] - pos2[0]) ** 2 + (pos1[1] - pos2[1]) ** 2)

  56. def mean(lst):
  57.     """Return the arithmetic mean of a sequence of numbers.

  58.     >>> mean([-1, 3])
  59.     1.0
  60.     >>> mean([0, -3, 2, -1])
  61.     -0.5
  62.     """
  63.     assert len(lst) > 0, 'cannot find mean of empty sequence'
  64.     return sum(lst) / len(lst)
复制代码
abstractions.py
  1. """Data Abstractions"""

  2. from utils import mean

  3. # Reviews

  4. def make_review(restaurant_name, rating):
  5.     """Return a review."""
  6.     return [restaurant_name, rating]

  7. def review_restaurant_name(review):
  8.     """Return the reviewed restaurant's name (string)."""
  9.     return review[0]

  10. def review_rating(review):
  11.     """Return the number of stars given (1 to 5)."""
  12.     return review[1]

  13. # Users

  14. def make_user(name, reviews):
  15.     """Return a user."""
  16.     return [name, {review_restaurant_name(r): r for r in reviews}]

  17. def user_name(user):
  18.     """Return the USER's name (string)."""
  19.     return user[0]

  20. def user_reviews(user):
  21.     """Return a dictionary from restaurant names to reviews by the USER."""
  22.     return user[1]

  23. ### === +++ USER ABSTRACTION BARRIER +++ === ###

  24. def user_reviewed_restaurants(user, restaurants):
  25.     """Return the subset of restaurants reviewed by USER.

  26.     Arguments:
  27.     user -- a user
  28.     restaurants -- a dictionary from restaurant names to restaurants
  29.     """
  30.     names = user_reviews(user).keys()
  31.     return {name: restaurants[name] for name in names if name in restaurants}

  32. def user_rating(user, restaurant_name):
  33.     """Return the rating given for RESTAURANT_NAME by USER."""
  34.     return review_rating(user_reviews(user)[restaurant_name])

  35. # Restaurants

  36. def make_restaurant(name, location, categories, price, reviews):
  37.     """Return a restaurant, implemented as a dictionary."""
  38.     # You may change this starter implementation however you wish, including
  39.     # adding more items to the dictionary below.
  40.     return {'name': name,
  41.             'location': location,
  42.             'categories': categories,
  43.             'price': price,
  44.             'reviews': reviews
  45.             }

  46. def restaurant_name(restaurant):
  47.     return restaurant['name']

  48. def restaurant_location(restaurant):
  49.     return restaurant['location']

  50. def restaurant_categories(restaurant):
  51.     return restaurant['categories']

  52. def restaurant_price(restaurant):
  53.     return restaurant['price']

  54. def restaurant_ratings(restaurant):
  55.     """Return a list of ratings (numbers from 1 to 5)."""
  56.     return [review_rating(r) for r in restaurant['reviews']]

  57. ### === +++ RESTAURANT ABSTRACTION BARRIER +++ === ###

  58. def restaurant_num_ratings(restaurant):
  59.     """Return the number of ratings for RESTAURANT."""
  60.     return len(restaurant_ratings(restaurant))

  61. def restaurant_mean_rating(restaurant):
  62.     """Return the average rating for RESTAURANT."""
  63.     return mean(restaurant_ratings(restaurant))
复制代码
recommend.py
  1. """A Yelp-powered Restaurant Recommendation Program"""

  2. from abstractions import *
  3. from utils import distance, mean, zip, enumerate, sample
  4. from visualize import draw_map
  5. from data import RESTAURANTS, CATEGORIES, USER_FILES, load_user_file
  6. from ucb import main, trace, interact

  7. def find_closest(location, centroids):
  8.     """Return the item in CENTROIDS that is closest to LOCATION. If two
  9.     centroids are equally close, return the first one.

  10.     >>> find_closest([3, 4], [[0, 0], [2, 3], [4, 3], [5, 5]])
  11.     [2, 3]
  12.     """
  13.     # construct a dict: key: index in centroids list; value: distance to location
  14.     dis_dict = dict()
  15.     for index in range(len(centroids)):
  16.         dis_dict[index] = distance(centroids[index], location)

  17.     return centroids[min(list(dis_dict.keys()), key = lambda x: dis_dict[x])]

  18. def group_by_first(pairs):
  19.     """Return a list of pairs that relates each unique key in [key, value]
  20.     pairs to a list of all values that appear paired with that key.

  21.     Arguments:
  22.     pairs -- a sequence of pairs

  23.     >>> example = [ [1, 2], [3, 2], [2, 4], [1, 3], [3, 1], [1, 2] ]
  24.     >>> group_by_first(example)
  25.     [[2, 3, 2], [2, 1], [4]]
  26.     """
  27.     # Optional: This implementation is slow because it traverses the list of
  28.     #           pairs one time for each key. Can you improve it?
  29.     keys = []
  30.     for key, _ in pairs:
  31.         if key not in keys:
  32.             keys.append(key)
  33.     return [[y for x, y in pairs if x == key] for key in keys]

  34. def group_by_centroid(restaurants, centroids):
  35.     """Return a list of lists, where each list contains all restaurants nearest
  36.     to some item in CENTROIDS. Each item in RESTAURANTS should appear once in
  37.     the result, along with the other restaurants nearest to the same centroid.
  38.     No empty lists should appear in the result.
  39.     """
  40.     pairs = []
  41.     for r in restaurants:
  42.         centroid = find_closest(restaurant_location(r), centroids)
  43.         pair = [centroid, r]
  44.         pairs.append(pair)

  45.     return group_by_first(pairs)


  46. def find_centroid(restaurants):
  47.     """Return the centroid of the locations of RESTAURANTS."""
  48.     latitude = [restaurant_location(r)[0] for r in restaurants]
  49.     longitude = [restaurant_location(r)[1] for r in restaurants]
  50.     return [mean(latitude), mean(longitude)]

  51. def k_means(restaurants, k, max_updates=100):
  52.     """Use k-means to group RESTAURANTS by location into K clusters."""
  53.     assert len(restaurants) >= k, 'Not enough restaurants to cluster'
  54.     old_centroids, n = [], 0
  55.     # Select initial centroids randomly by choosing K different restaurants
  56.     centroids = [restaurant_location(r) for r in sample(restaurants, k)]

  57.     while old_centroids != centroids and n < max_updates:
  58.         old_centroids = centroids
  59.         group = group_by_centroid(restaurants, centroids)
  60.         centroids = [find_centroid(g) for g in group]
  61.         n += 1
  62.     return centroids

  63. def find_predictor(user, restaurants, feature_fn):
  64.     """Return a rating predictor (a function from restaurants to ratings),
  65.     for USER by performing least-squares linear regression using FEATURE_FN
  66.     on the items in RESTAURANTS. Also, return the R^2 value of this model.

  67.     Arguments:
  68.     user -- A user
  69.     restaurants -- A sequence of restaurants
  70.     feature_fn -- A function that takes a restaurant and returns a number
  71.     """
  72.     reviews_by_user = {review_restaurant_name(review): review_rating(review)
  73.                        for review in user_reviews(user).values()}

  74.     # xs: the extracted values for each restaurant in restaurants
  75.     # ys: the ratings for the restaurants in restaurants
  76.     xs = [feature_fn(r) for r in restaurants]
  77.     ys = [reviews_by_user[restaurant_name(r)] for r in restaurants]

  78.     s_xx = sum([pow((xs[i] - mean(xs)), 2) for i in range(len(xs))])
  79.     s_yy = sum([pow((ys[i] - mean(ys)), 2) for i in range(len(ys))])
  80.     s_xy = sum([(xs[i] - mean(xs)) * (ys[i] - mean(ys)) for i in range(len(xs))])
  81.     b = s_xy / s_xx
  82.     a = mean(ys) - b * mean(xs)
  83.     r_squared = s_xy * s_xy / (s_xx * s_yy)

  84.     def predictor(restaurant):
  85.         return b * feature_fn(restaurant) + a

  86.     return predictor, r_squared

  87. def best_predictor(user, restaurants, feature_fns):
  88.     """Find the feature within FEATURE_FNS that gives the highest R^2 value
  89.     for predicting ratings by the user; return a predictor using that feature.

  90.     Arguments:
  91.     user -- A user
  92.     restaurants -- A dictionary from restaurant names to restaurants
  93.     feature_fns -- A sequence of functions that each takes a restaurant
  94.     """
  95.     reviewed = list(user_reviewed_restaurants(user, restaurants).values())
  96.     # d: key: predictor, value: r_squared
  97.     d = dict()
  98.     for feature_fn in feature_fns:
  99.         k, v = find_predictor(user, reviewed, feature_fn)
  100.         d[k] = v

  101.     return max(list(d.keys()), key = lambda x: d[x])


  102. def rate_all(user, restaurants, feature_functions):
  103.     """Return the predicted ratings of RESTAURANTS by USER using the best
  104.     predictor based a function from FEATURE_FUNCTIONS.

  105.     Arguments:
  106.     user -- A user
  107.     restaurants -- A dictionary from restaurant names to restaurants
  108.     """
  109.     # Use the best predictor for the user, learned from *all* restaurants
  110.     # (Note: the name RESTAURANTS is bound to a dictionary of all restaurants)
  111.     predictor = best_predictor(user, RESTAURANTS, feature_functions)
  112.     # return value: key->same as restaurants, value->rating
  113.     d = dict()
  114.     reviewed = list(user_reviewed_restaurants(user, restaurants).values())
  115.     for rk, rv in restaurants.items():
  116.         if rv in reviewed:
  117.             d[rk] = user_rating(user, restaurant_name(rv))
  118.         else:
  119.             d[rk] = predictor(rv)
  120.     return d


  121. def search(query, restaurants):
  122.     """Return each restaurant in RESTAURANTS that has QUERY as a category.

  123.     Arguments:
  124.     query -- A string
  125.     restaurants -- A sequence of restaurants
  126.     """
  127.     r_list = []
  128.     for r in restaurants:
  129.         if query in restaurant_categories(r):
  130.             r_list.append(r)

  131.     return r_list


  132. def feature_set():
  133.     """Return a sequence of feature functions."""
  134.     return [restaurant_mean_rating,
  135.             restaurant_price,
  136.             restaurant_num_ratings,
  137.             lambda r: restaurant_location(r)[0],
  138.             lambda r: restaurant_location(r)[1]]

  139. @main
  140. def main(*args):
  141.     import argparse
  142.     parser = argparse.ArgumentParser(
  143.         description='Run Recommendations',
  144.         formatter_class=argparse.RawTextHelpFormatter
  145.     )
  146.     parser.add_argument('-u', '--user', type=str, choices=USER_FILES,
  147.                         default='test_user',
  148.                         metavar='USER',
  149.                         help='user file, e.g.\n' +
  150.                         '{{{}}}'.format(','.join(sample(USER_FILES, 3))))
  151.     parser.add_argument('-k', '--k', type=int, help='for k-means')
  152.     parser.add_argument('-q', '--query', choices=CATEGORIES,
  153.                         metavar='QUERY',
  154.                         help='search for restaurants by category e.g.\n'
  155.                         '{{{}}}'.format(','.join(sample(CATEGORIES, 3))))
  156.     parser.add_argument('-p', '--predict', action='store_true',
  157.                         help='predict ratings for all restaurants')
  158.     args = parser.parse_args()

  159.     # Select restaurants using a category query
  160.     if args.query:
  161.         results = search(args.query, RESTAURANTS.values())
  162.         restaurants = {restaurant_name(r): r for r in results}
  163.     else:
  164.         restaurants = RESTAURANTS

  165.     # Load a user
  166.     assert args.user, 'A --user is required to draw a map'
  167.     user = load_user_file('{}.dat'.format(args.user))

  168.     # Collect ratings
  169.     if args.predict:
  170.         ratings = rate_all(user, restaurants, feature_set())
  171.     else:
  172.         restaurants = user_reviewed_restaurants(user, restaurants)
  173.         ratings = {name: user_rating(user, name) for name in restaurants}

  174.     # Draw the visualization
  175.     restaurant_list = list(restaurants.values())
  176.     if args.k:
  177.         centroids = k_means(restaurant_list, min(args.k, len(restaurant_list)))
  178.     else:
  179.         centroids = [restaurant_location(r) for r in restaurant_list]
  180.     draw_map(centroids, restaurant_list, ratings)
复制代码
回复 支持 反对

使用道具 举报

本版积分规则

请点这里访问我们的新网站:一亩三分地Instant.

Instant搜索更强大,不扣积分,内容组织的更好更整洁!目前仍在beta版本,努力完善中!反馈请点这里

关闭

一亩三分地推荐上一条 /5 下一条

手机版|小黑屋|一亩三分地论坛声明 ( 沪ICP备11015994号 )

custom counter

GMT+8, 2016-12-6 13:56

Powered by Discuz! X3

© 2001-2013 Comsenz Inc. Design By HUXTeam

快速回复 返回顶部 返回列表