#!/usr/bin/env python # encoding: utf-8 """ learning.py Created by Cody Mays on 2009-10-20. """ import sys import os import random from mldata import * # Global vars moviesAvg = [1.0] * 30 userAvg = [1.0] * len(allRatings) def task1(): total = 0 divisor = 0 # Movie average print 'Movie averages:\n' for i in range(0,len(allRatings[0])): movieTotal = 0 movieDivisor = 0 for j in range(0, len(allRatings)): x = allRatings[j] y = allRatings[j][i] if(allRatings[j][i] != 0): movieTotal += allRatings[j][i] movieDivisor += 1 if(movieDivisor == 0): moviesAvg[i] = 0 else: moviesAvg[i] = movieTotal/movieDivisor print '#',i,':',moviesAvg[i] # User average... print '' print 'User averages: ' for i in range(0, len(allRatings)): for j in range(0, len(allRatings[i])): if(allRatings[i][j] != 0): total += allRatings[i][j] divisor += 1 # Make sure divisor isn't 0 and output the average for each user if(divisor != 0): userAvg[i] = total/divisor else: userAvg[i] = 0 print names[i],':',userAvg[i] total = divisor = 0 def task2(): correlationScores = [0 for i in range(len(allRatings))] normVals = [[0 for col in range( (len(allRatings[0])+1))] for row in range(len(allRatings))] dotProducts = [0 for i in range(len(allRatings))] maxCV = 0 me = 16 closest = 0 # Normalize scores vector and store for user in range(0, len(allRatings)): for j in range(0, len(allRatings[user])): normVals[user][j] = allRatings[user][j] - userAvg[user] # Find closest correlation for user in range(0, len(allRatings)): dotProd = 0 if(user != me): #calc dot product for j in range(0, len(allRatings[user])): dotProd += normVals[me][j] * normVals[user][j] dotProducts[user] = dotProd if(dotProducts[user] > dotProducts[maxCV]): maxCV = user # Try to find closest user for user in range(0, len(allRatings)): if( abs(dotProducts[me] - dotProducts[closest]) > abs(dotProducts[me] - dotProducts[user]) ): closest = user print "The person with the highest C is", names[maxCV] print "The closest user to you is", names[user] def update(user, movie, actualRating): predictedRating = moviesAvg[movie] * userAvg[user] error = actualRating - predictedRating userAvg[user] += error * moviesAvg[movie] * 0.0001 moviesAvg[movie] += error * userAvg[user] * 0.0001 def task3(): maxMovie = 0 minMovie = 1 maxUser = 0 minUser = 1 for i in range(0,500): movie = random.randint(0, (len(allRatings[0])-1)) user = random.randint(0, (len(allRatings)-1)) update(user, movie, allRatings[user][movie]) if(moviesAvg[movie] > moviesAvg[maxMovie]): maxMovie = movie elif(moviesAvg[movie] < moviesAvg[minMovie]): minMovie = movie if(userAvg[user] > userAvg[maxUser]): maxUser = user elif(userAvg[user] < userAvg[minUser]): minUser = user print 'Most influential movie:', maxMovie print 'Least influential movie:', minMovie print 'Most influenced user:',maxUser,names[maxUser] print 'Least influenced user:',minUser,names[minUser] # Predict my movie # I'm id #16 myMovie = 0 for i in range(0, len(moviesAvg)): r = userAvg[16] * moviesAvg[i] if(r > (userAvg[myMovie] * moviesAvg[myMovie])): myMovie = i print 'I should watch movie', myMovie def main(): task1() task2() task3() if __name__ == '__main__': main()