{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import time\n", "os.chdir(os.path.dirname(INSERT_PATH_HERE))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import requests\n", "import numpy as np\n", "import pandas as pd\n", "import json\n", "\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "96 entities not found\n" ] } ], "source": [ "df = pd.read_csv ('1kmovies.csv',sep=\"\\t\")\n", "dfX = df[['Movie']]\n", "\n", "not_found = 0\n", "\n", "vectors = []\n", "entities =list(dict.fromkeys(df['Movie'].to_list()))\n", "for e in entities:\n", " entity = e[e.rindex(\"/\")+1:]\n", " r = requests.get(\"http://kgvec2go.org/rest/get-vector/dbpedia/\" + entity)\n", " x = json.loads(r.text)\n", " # Catch case that an entity is not found in the API\n", " if 'vector' in x:\n", " vectors.append(x['vector'])\n", " else:\n", " vectors.append(np.zeros(200))\n", " not_found = not_found + 1\n", "\n", "print(str(not_found) + \" entities not found\")\n", "dfXvectors = pd.DataFrame.from_records(vectors)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Movie012345678...190191192193194195196197198199
0http://dbpedia.org/resource/Playhouse_90-0.137760-0.589059-0.037958-0.534860-0.4136750.4899860.8348270.483092-0.237307...-0.7463740.178704-0.3009271.041487-0.1146650.074592-0.0486730.689411-0.3758390.637875
1http://dbpedia.org/resource/Looney_Tunes-1.042180-0.4267870.223503-0.041168-0.4137340.1035940.745793-0.070818-0.486270...0.041576-0.289932-0.0113910.7440240.575132-0.176852-0.2891610.196203-1.1270210.352837
2http://dbpedia.org/resource/Merrie_Melodies-0.735772-0.306961-0.0057320.092917-0.477233-0.0229620.886615-0.086885-0.621619...0.165683-0.267800-0.2121830.8356900.5368590.230216-0.4523820.141061-1.0832070.298553
3http://dbpedia.org/resource/American_Playhouse-0.218308-0.3831180.6980630.291182-0.1293090.2272230.1601460.404174-0.492890...0.327066-0.3082090.1565680.2297400.084384-0.2695710.1282570.5005170.0306790.423012
4http://dbpedia.org/resource/Fantasia_(1940_film)0.012596-0.363675-0.200341-0.288007-0.9182490.103618-0.0337920.1843930.351371...-0.8452040.2327620.1356840.178445-0.213599-0.1510710.185861-0.250488-0.5606390.046183
5http://dbpedia.org/resource/Snow_White_and_the...-0.110619-0.022438-0.4365840.132460-0.9681460.210315-0.113157-0.333683-0.110539...-0.721070-0.044716-0.1403880.7409210.212634-0.4322970.4831200.281024-0.7830870.206580
6http://dbpedia.org/resource/Band_of_Brothers_(...-0.301021-0.601196-0.667036-0.477340-0.5210350.2385520.4454940.480574-0.397435...0.194413-0.0416890.322213-0.010924-0.0495400.073796-0.251234-0.478446-0.8187310.169260
7http://dbpedia.org/resource/Pinocchio_(1940_film)0.058439-0.564881-0.317060-0.099454-0.334787-0.0126350.1996920.5487220.033585...-0.3360760.0843920.3695340.6918760.038075-0.2104860.173873-0.005578-0.7965970.358472
8http://dbpedia.org/resource/Harry_Potter_(film...-0.138585-1.1587170.065944-0.110256-1.0142480.0952000.2086440.7833790.462843...-0.144742-0.565039-0.1182080.6906980.125068-0.759913-0.156163-0.250222-0.0470050.022732
9http://dbpedia.org/resource/Red_Dwarf0.131227-0.7951820.650905-1.366069-0.648056-0.0677800.0271300.2168610.528318...-0.493793-0.5400190.1724660.1080050.014064-0.350497-0.185962-0.084622-0.049915-0.043591
10http://dbpedia.org/resource/Gimme_a_Break!-0.426197-0.643072-0.1436840.021588-0.534515-0.5419240.903420-0.0276600.516001...-0.3059900.2792920.0805570.3457030.4337650.0371071.373668-0.186867-0.3588730.585094
11http://dbpedia.org/resource/Last_of_the_Summer...-0.387616-0.8496540.520858-1.152769-0.395122-0.0788610.1342760.767099-0.086088...0.074024-0.180376-0.568238-0.009507-0.7062410.183831-0.243164-0.830150-0.0992410.196781
12http://dbpedia.org/resource/The_Lion_King-0.185161-0.6677580.351287-0.071545-0.7152350.6549800.9425130.299595-0.796929...-0.103406-0.306330-0.0345030.2777800.518103-0.6051220.536551-0.022816-0.4289240.105254
13http://dbpedia.org/resource/The_Philco_Televis...-0.427943-0.492605-0.227726-0.551582-0.155778-0.0702010.7490410.6924730.142140...-0.951145-0.113896-0.2341530.714249-0.2559020.0194030.571655-0.1556270.0627350.238491
14http://dbpedia.org/resource/Sleeping_Beauty_(1...-0.594262-0.284907-0.208800-0.275717-0.7983830.5036910.1536420.466465-0.296610...-0.5424770.1053570.0775650.9307460.081768-0.2181560.095910-0.159626-0.5584230.424275
15http://dbpedia.org/resource/The_Bold_Ones:_The...-0.275917-0.4906520.058167-0.349791-0.6160280.1178020.3159080.584220-0.168284...-0.5896130.179460-0.0829520.514616-0.1245560.3268070.1900470.1405600.3245200.007021
16http://dbpedia.org/resource/Dumbo-0.090244-0.497215-0.320145-0.082811-0.8044570.0490520.170885-0.0626570.201343...-0.5412160.2351220.1322510.4393900.147823-0.2288220.4375670.228110-0.5780950.010694
17http://dbpedia.org/resource/Frozen_(2013_film)-0.632902-1.110231-0.051083-0.036203-0.6184830.6443210.5208050.568159-0.492555...-0.619857-0.0958980.736908-0.0294180.117501-0.3164090.3413460.652590-0.3818500.527155
18http://dbpedia.org/resource/Casino_Royale_(196...0.7730640.109179-0.051000-0.132526-0.7151550.0914280.640842-0.264343-0.260256...0.158924-0.7398660.4991080.398301-0.216687-0.3424710.326373-0.205443-0.1028620.162528
19http://dbpedia.org/resource/The_Pacific_(minis...0.069477-0.758959-0.425278-0.280973-0.8937120.1489440.3986240.288982-0.399282...-0.178115-0.176143-0.460993-0.310192-0.304817-0.0023300.326521-0.118486-0.749635-0.003137
20http://dbpedia.org/resource/Star_Wars_(film)-0.504468-0.495121-1.276071-0.247087-0.5103090.0430880.9046670.165914-0.123610...-0.089242-0.0659290.2803220.7072750.042698-0.2264980.118022-0.405879-0.3227030.222759
21http://dbpedia.org/resource/The_Dick_Powell_Show0.3480110.107263-0.0634370.058328-0.0913510.0223630.2197580.7670880.308749...0.0260860.0051910.2430430.5379950.2507070.2085930.2379560.294168-0.0620480.157651
22http://dbpedia.org/resource/Beauty_and_the_Bea...-0.188234-0.4916750.664066-0.080035-0.6514260.3731240.1296130.297080-0.431981...-0.452267-0.3570040.3463600.0432670.086430-0.4565170.2350750.006192-0.2329160.730093
23http://dbpedia.org/resource/Aladdin_(1992_Disn...-0.485015-0.3215460.368292-0.076003-0.8250970.2389730.1730730.299280-0.567592...-0.634339-0.6746950.2044780.102706-0.259019-0.3507330.1333050.3262420.3841480.450761
24http://dbpedia.org/resource/Cinderella_(1950_f...-0.136178-0.4272340.593715-0.105923-0.3910590.3394420.0647060.549290-0.405299...-0.776572-0.4266980.0459250.0318470.034778-0.4387050.449561-0.188039-0.2605600.615081
\n", "

25 rows × 201 columns

\n", "
" ], "text/plain": [ " Movie 0 1 \\\n", "0 http://dbpedia.org/resource/Playhouse_90 -0.137760 -0.589059 \n", "1 http://dbpedia.org/resource/Looney_Tunes -1.042180 -0.426787 \n", "2 http://dbpedia.org/resource/Merrie_Melodies -0.735772 -0.306961 \n", "3 http://dbpedia.org/resource/American_Playhouse -0.218308 -0.383118 \n", "4 http://dbpedia.org/resource/Fantasia_(1940_film) 0.012596 -0.363675 \n", "5 http://dbpedia.org/resource/Snow_White_and_the... -0.110619 -0.022438 \n", "6 http://dbpedia.org/resource/Band_of_Brothers_(... -0.301021 -0.601196 \n", "7 http://dbpedia.org/resource/Pinocchio_(1940_film) 0.058439 -0.564881 \n", "8 http://dbpedia.org/resource/Harry_Potter_(film... -0.138585 -1.158717 \n", "9 http://dbpedia.org/resource/Red_Dwarf 0.131227 -0.795182 \n", "10 http://dbpedia.org/resource/Gimme_a_Break! -0.426197 -0.643072 \n", "11 http://dbpedia.org/resource/Last_of_the_Summer... -0.387616 -0.849654 \n", "12 http://dbpedia.org/resource/The_Lion_King -0.185161 -0.667758 \n", "13 http://dbpedia.org/resource/The_Philco_Televis... -0.427943 -0.492605 \n", "14 http://dbpedia.org/resource/Sleeping_Beauty_(1... -0.594262 -0.284907 \n", "15 http://dbpedia.org/resource/The_Bold_Ones:_The... -0.275917 -0.490652 \n", "16 http://dbpedia.org/resource/Dumbo -0.090244 -0.497215 \n", "17 http://dbpedia.org/resource/Frozen_(2013_film) -0.632902 -1.110231 \n", "18 http://dbpedia.org/resource/Casino_Royale_(196... 0.773064 0.109179 \n", "19 http://dbpedia.org/resource/The_Pacific_(minis... 0.069477 -0.758959 \n", "20 http://dbpedia.org/resource/Star_Wars_(film) -0.504468 -0.495121 \n", "21 http://dbpedia.org/resource/The_Dick_Powell_Show 0.348011 0.107263 \n", "22 http://dbpedia.org/resource/Beauty_and_the_Bea... -0.188234 -0.491675 \n", "23 http://dbpedia.org/resource/Aladdin_(1992_Disn... -0.485015 -0.321546 \n", "24 http://dbpedia.org/resource/Cinderella_(1950_f... -0.136178 -0.427234 \n", "\n", " 2 3 4 5 6 7 8 ... \\\n", "0 -0.037958 -0.534860 -0.413675 0.489986 0.834827 0.483092 -0.237307 ... \n", "1 0.223503 -0.041168 -0.413734 0.103594 0.745793 -0.070818 -0.486270 ... \n", "2 -0.005732 0.092917 -0.477233 -0.022962 0.886615 -0.086885 -0.621619 ... \n", "3 0.698063 0.291182 -0.129309 0.227223 0.160146 0.404174 -0.492890 ... \n", "4 -0.200341 -0.288007 -0.918249 0.103618 -0.033792 0.184393 0.351371 ... \n", "5 -0.436584 0.132460 -0.968146 0.210315 -0.113157 -0.333683 -0.110539 ... \n", "6 -0.667036 -0.477340 -0.521035 0.238552 0.445494 0.480574 -0.397435 ... \n", "7 -0.317060 -0.099454 -0.334787 -0.012635 0.199692 0.548722 0.033585 ... \n", "8 0.065944 -0.110256 -1.014248 0.095200 0.208644 0.783379 0.462843 ... \n", "9 0.650905 -1.366069 -0.648056 -0.067780 0.027130 0.216861 0.528318 ... \n", "10 -0.143684 0.021588 -0.534515 -0.541924 0.903420 -0.027660 0.516001 ... \n", "11 0.520858 -1.152769 -0.395122 -0.078861 0.134276 0.767099 -0.086088 ... \n", "12 0.351287 -0.071545 -0.715235 0.654980 0.942513 0.299595 -0.796929 ... \n", "13 -0.227726 -0.551582 -0.155778 -0.070201 0.749041 0.692473 0.142140 ... \n", "14 -0.208800 -0.275717 -0.798383 0.503691 0.153642 0.466465 -0.296610 ... \n", "15 0.058167 -0.349791 -0.616028 0.117802 0.315908 0.584220 -0.168284 ... \n", "16 -0.320145 -0.082811 -0.804457 0.049052 0.170885 -0.062657 0.201343 ... \n", "17 -0.051083 -0.036203 -0.618483 0.644321 0.520805 0.568159 -0.492555 ... \n", "18 -0.051000 -0.132526 -0.715155 0.091428 0.640842 -0.264343 -0.260256 ... \n", "19 -0.425278 -0.280973 -0.893712 0.148944 0.398624 0.288982 -0.399282 ... \n", "20 -1.276071 -0.247087 -0.510309 0.043088 0.904667 0.165914 -0.123610 ... \n", "21 -0.063437 0.058328 -0.091351 0.022363 0.219758 0.767088 0.308749 ... \n", "22 0.664066 -0.080035 -0.651426 0.373124 0.129613 0.297080 -0.431981 ... \n", "23 0.368292 -0.076003 -0.825097 0.238973 0.173073 0.299280 -0.567592 ... \n", "24 0.593715 -0.105923 -0.391059 0.339442 0.064706 0.549290 -0.405299 ... \n", "\n", " 190 191 192 193 194 195 196 \\\n", "0 -0.746374 0.178704 -0.300927 1.041487 -0.114665 0.074592 -0.048673 \n", "1 0.041576 -0.289932 -0.011391 0.744024 0.575132 -0.176852 -0.289161 \n", "2 0.165683 -0.267800 -0.212183 0.835690 0.536859 0.230216 -0.452382 \n", "3 0.327066 -0.308209 0.156568 0.229740 0.084384 -0.269571 0.128257 \n", "4 -0.845204 0.232762 0.135684 0.178445 -0.213599 -0.151071 0.185861 \n", "5 -0.721070 -0.044716 -0.140388 0.740921 0.212634 -0.432297 0.483120 \n", "6 0.194413 -0.041689 0.322213 -0.010924 -0.049540 0.073796 -0.251234 \n", "7 -0.336076 0.084392 0.369534 0.691876 0.038075 -0.210486 0.173873 \n", "8 -0.144742 -0.565039 -0.118208 0.690698 0.125068 -0.759913 -0.156163 \n", "9 -0.493793 -0.540019 0.172466 0.108005 0.014064 -0.350497 -0.185962 \n", "10 -0.305990 0.279292 0.080557 0.345703 0.433765 0.037107 1.373668 \n", "11 0.074024 -0.180376 -0.568238 -0.009507 -0.706241 0.183831 -0.243164 \n", "12 -0.103406 -0.306330 -0.034503 0.277780 0.518103 -0.605122 0.536551 \n", "13 -0.951145 -0.113896 -0.234153 0.714249 -0.255902 0.019403 0.571655 \n", "14 -0.542477 0.105357 0.077565 0.930746 0.081768 -0.218156 0.095910 \n", "15 -0.589613 0.179460 -0.082952 0.514616 -0.124556 0.326807 0.190047 \n", "16 -0.541216 0.235122 0.132251 0.439390 0.147823 -0.228822 0.437567 \n", "17 -0.619857 -0.095898 0.736908 -0.029418 0.117501 -0.316409 0.341346 \n", "18 0.158924 -0.739866 0.499108 0.398301 -0.216687 -0.342471 0.326373 \n", "19 -0.178115 -0.176143 -0.460993 -0.310192 -0.304817 -0.002330 0.326521 \n", "20 -0.089242 -0.065929 0.280322 0.707275 0.042698 -0.226498 0.118022 \n", "21 0.026086 0.005191 0.243043 0.537995 0.250707 0.208593 0.237956 \n", "22 -0.452267 -0.357004 0.346360 0.043267 0.086430 -0.456517 0.235075 \n", "23 -0.634339 -0.674695 0.204478 0.102706 -0.259019 -0.350733 0.133305 \n", "24 -0.776572 -0.426698 0.045925 0.031847 0.034778 -0.438705 0.449561 \n", "\n", " 197 198 199 \n", "0 0.689411 -0.375839 0.637875 \n", "1 0.196203 -1.127021 0.352837 \n", "2 0.141061 -1.083207 0.298553 \n", "3 0.500517 0.030679 0.423012 \n", "4 -0.250488 -0.560639 0.046183 \n", "5 0.281024 -0.783087 0.206580 \n", "6 -0.478446 -0.818731 0.169260 \n", "7 -0.005578 -0.796597 0.358472 \n", "8 -0.250222 -0.047005 0.022732 \n", "9 -0.084622 -0.049915 -0.043591 \n", "10 -0.186867 -0.358873 0.585094 \n", "11 -0.830150 -0.099241 0.196781 \n", "12 -0.022816 -0.428924 0.105254 \n", "13 -0.155627 0.062735 0.238491 \n", "14 -0.159626 -0.558423 0.424275 \n", "15 0.140560 0.324520 0.007021 \n", "16 0.228110 -0.578095 0.010694 \n", "17 0.652590 -0.381850 0.527155 \n", "18 -0.205443 -0.102862 0.162528 \n", "19 -0.118486 -0.749635 -0.003137 \n", "20 -0.405879 -0.322703 0.222759 \n", "21 0.294168 -0.062048 0.157651 \n", "22 0.006192 -0.232916 0.730093 \n", "23 0.326242 0.384148 0.450761 \n", "24 -0.188039 -0.260560 0.615081 \n", "\n", "[25 rows x 201 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfXcomplete = pd.concat([dfX,dfXvectors],axis=1,join=\"inner\")\n", "dfXcomplete.head(25)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 -0.230488\n", "1 -0.439156\n", "2 0.197780\n", "3 -0.135158\n", "4 -0.728127\n", " ... \n", "195 -0.274856\n", "196 0.331307\n", "197 0.062966\n", "198 -0.103116\n", "199 0.321939\n", "Length: 200, dtype: float64\n", "http://dbpedia.org/resource/Cinderella_(1950_film)\n", "http://dbpedia.org/resource/Aladdin_(1992_Disney_film)\n", "http://dbpedia.org/resource/Bambi\n", "http://dbpedia.org/resource/Beauty_and_the_Beast_(1991_film)\n", "http://dbpedia.org/resource/Tangled\n", "http://dbpedia.org/resource/Peter_Pan_(1953_film)\n", "http://dbpedia.org/resource/The_Three_Caballeros\n", "http://dbpedia.org/resource/The_Little_Mermaid_(1989_film)\n", "http://dbpedia.org/resource/Beauty_and_the_Beast_(2017_film)\n", "http://dbpedia.org/resource/Saludos_Amigos\n" ] } ], "source": [ "from sklearn.neighbors import NearestNeighbors\n", "\n", "knn = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='cosine')\n", "knn.fit(dfXvectors.to_numpy())\n", "\n", "user1 = [\"http://dbpedia.org/resource/The_Matrix\",\"http://dbpedia.org/resource/Interstellar_(film)\",\"http://dbpedia.org/resource/Blade_Runner\"]\n", "user2 = [\"http://dbpedia.org/resource/Bambi\",\"http://dbpedia.org/resource/Aladdin_(1992_Disney_film)\",\"http://dbpedia.org/resource/Cinderella_(1950_film)\"]\n", "\n", "def get_recommendations(user_profile,df_vectors):\n", " df_uservector = df_vectors[dfX['Movie'].isin(user_profile)].mean()\n", " print(df_uservector)\n", " return knn.kneighbors([df_uservector], 10, return_distance=False)\n", "\n", "for index in get_recommendations(user2,dfXvectors)[0]:\n", " print(dfX.at[index,'Movie'])" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 1 2 3 4 5 6 \\\n", "55 0.318401 0.139367 -0.701737 -0.279444 -0.897473 0.299331 0.466589 \n", "59 -0.256155 -0.262864 -0.451567 0.016916 -1.072240 -0.461869 -0.136405 \n", "362 0.140722 0.256464 -0.289506 -0.067005 -0.788516 -0.310958 0.428743 \n", "719 0.011412 0.284625 -0.402758 0.161706 -0.642788 -0.145495 0.289870 \n", "413 0.276396 -0.000320 -0.489742 0.338217 -0.533667 0.146200 0.010560 \n", "828 0.647876 0.425028 -0.850197 0.543665 -0.848872 0.073361 0.156451 \n", "986 0.442210 0.337279 -0.566876 0.210853 -0.584514 0.171120 0.322006 \n", "283 -0.426962 -0.150118 -0.368088 0.185758 -0.907161 -0.417741 -0.040905 \n", "473 0.322986 0.083523 -0.458807 0.396477 -0.595785 0.204056 0.398959 \n", "209 -0.013298 0.115373 -0.373912 0.026581 -0.619941 -0.011785 0.265499 \n", "24 -0.136178 -0.427234 0.593715 -0.105923 -0.391059 0.339442 0.064706 \n", "23 -0.485015 -0.321546 0.368292 -0.076003 -0.825097 0.238973 0.173073 \n", "73 -0.070271 -0.568688 -0.368667 -0.223548 -0.968225 0.254485 0.300212 \n", "22 -0.188234 -0.491675 0.664066 -0.080035 -0.651426 0.373124 0.129613 \n", "79 -0.412844 -0.120966 0.301059 -0.398393 -0.912849 0.290974 0.357487 \n", "28 -0.534488 -0.845421 -0.352786 -0.010685 -0.768865 0.090177 -0.008820 \n", "100 0.019181 -0.419963 -0.257470 0.046528 -0.791790 0.003891 0.234078 \n", "27 -0.566510 0.098564 0.704964 -0.300415 -0.802405 -0.135183 -0.414215 \n", "480 0.156848 -0.703100 0.132047 0.089070 -0.618986 0.162045 0.177390 \n", "251 -0.377803 -0.470877 0.000738 0.019628 -0.452400 0.438161 -0.055662 \n", "\n", " 7 8 9 ... 190 191 192 \\\n", "55 0.063447 -0.631822 -0.277138 ... -0.176155 -0.744473 -0.330266 \n", "59 0.559580 0.262314 0.568922 ... 0.052474 -0.300920 0.326962 \n", "362 0.183505 0.100805 0.693227 ... -0.072536 -0.380478 0.196136 \n", "719 0.299873 -0.330871 0.628309 ... -0.208002 -0.316287 0.055104 \n", "413 0.314999 -0.064188 0.285488 ... 0.265778 -0.038251 -0.096124 \n", "828 0.250268 0.447005 -0.032032 ... -0.206418 0.091566 0.171927 \n", "986 0.062472 -0.071037 0.296194 ... -0.364699 -0.091305 0.061686 \n", "283 -0.090521 -0.153316 0.268322 ... -0.266540 -0.365087 0.009403 \n", "473 0.758679 -0.111542 -0.099624 ... -0.460300 0.174754 0.546240 \n", "209 0.155927 -0.212716 0.207713 ... -0.178869 -0.107379 -0.075707 \n", "24 0.549290 -0.405299 0.323797 ... -0.776572 -0.426698 0.045925 \n", "23 0.299280 -0.567592 0.222896 ... -0.634339 -0.674695 0.204478 \n", "73 0.052589 -0.029153 0.247146 ... -0.594128 -0.133593 0.039135 \n", "22 0.297080 -0.431981 0.024356 ... -0.452267 -0.357004 0.346360 \n", "79 -0.373975 -0.735043 0.191279 ... -0.470608 -0.775667 0.453187 \n", "28 0.141059 0.042978 0.301992 ... -0.723882 -0.407864 0.334190 \n", "100 0.513376 -0.350695 -0.039778 ... -0.065682 0.462035 0.110817 \n", "27 0.170616 -0.434117 0.171072 ... -0.968996 -0.331415 -0.259925 \n", "480 -0.120469 -0.291758 0.419111 ... -0.427550 -0.549338 0.302811 \n", "251 0.521283 -0.131151 -0.294863 ... -0.703387 -0.138630 0.206960 \n", "\n", " 193 194 195 196 197 198 199 \n", "55 0.049837 0.209204 0.004850 -0.342679 -0.444115 -0.132099 -0.176516 \n", "59 0.671042 0.403135 -0.007217 0.093031 -0.461279 -0.175980 -0.048493 \n", "362 0.741731 -0.111106 0.415001 0.382035 -0.388946 -0.468983 0.506479 \n", "719 0.648155 -0.426689 0.325136 -0.040030 -0.278267 0.202169 0.406792 \n", "413 0.239046 -0.383956 -0.292289 -0.253236 -0.492697 -0.206753 -0.161367 \n", "828 0.484794 -0.436635 0.042103 0.073556 -0.015005 -0.893553 0.354957 \n", "986 0.498790 0.065654 0.063530 0.546541 -0.054107 -0.741318 0.142517 \n", "283 0.502563 0.363444 -0.033896 0.385330 -0.580187 -0.047346 0.336268 \n", "473 0.692509 0.051536 0.205738 0.086837 -0.353917 -0.586069 0.198249 \n", "209 0.552940 0.121073 0.056891 -0.245650 0.146369 -0.429660 0.507857 \n", "24 0.031847 0.034778 -0.438705 0.449561 -0.188039 -0.260560 0.615081 \n", "23 0.102706 -0.259019 -0.350733 0.133305 0.326242 0.384148 0.450761 \n", "73 0.793185 0.186650 -0.035128 0.411055 0.050694 -0.432934 -0.100027 \n", "22 0.043267 0.086430 -0.456517 0.235075 0.006192 -0.232916 0.730093 \n", "79 0.480819 -0.315200 -0.259552 -0.207007 0.101626 -0.427244 0.565416 \n", "28 0.799243 -0.052781 -0.720815 0.528965 0.235538 -0.320678 -0.237203 \n", "100 0.392284 -0.060565 -0.551313 0.285942 -0.047484 -0.520079 0.261523 \n", "27 0.289119 -0.149168 -0.367965 0.236865 0.200093 -0.042937 0.716321 \n", "480 0.121103 -0.189923 -0.379302 -0.177058 -0.413966 -0.194548 0.269128 \n", "251 0.596162 0.093474 -0.143856 0.398926 0.555437 -0.403876 0.016331 \n", "\n", "[20 rows x 200 columns]\n" ] } ], "source": [ "arr1 = get_recommendations(user1,dfXvectors)\n", "arr2 = get_recommendations(user2,dfXvectors)\n", "arr = np.append(arr1,arr2)\n", "df = dfXvectors.iloc[arr]\n", "print(df)" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from sklearn.decomposition import PCA\n", "import matplotlib.pyplot as plt\n", "import random as rd\n", "pca = PCA(n_components=2)\n", "pca_result = pca.fit_transform(dfXvectors)\n", "principalDf = pd.DataFrame(data = pca_result\n", " , columns = ['principal component 1', 'principal component 2'])\n", "\n", "finalDf = principalDf.iloc[arr]\n", "fig = plt.figure(figsize = (8,8))\n", "ax = fig.add_subplot(1,1,1) \n", "ax.set_xlim([-2.0,0.75])\n", "ax.set_xlabel('Principal Component 1')\n", "ax.set_ylabel('Principal Component 2')\n", "plt.scatter(finalDf['principal component 1']\n", " , finalDf['principal component 2'], c='b')\n", "\n", "for i in arr:\n", " str = dfX.at[i,'Movie']\n", " if str.find(\"(\")>0:\n", " str = str[str.rindex(\"/\")+1:str.index(\"(\")].replace(\"_\",\" \")\n", " else:\n", " str = str[str.rindex(\"/\")+1:].replace(\"_\",\" \")\n", " ax.annotate(str, (finalDf.at[i,'principal component 1'],finalDf.at[i,'principal component 2']+0.2*rd.random()-0.1))\n", "\n", "# add users as red dots\n", "def add_user(user_profile,plt,label):\n", " df_uservector = principalDf[dfX['Movie'].isin(user_profile)].mean() \n", " plt.plot(df_uservector['principal component 1'], df_uservector['principal component 2'], 'ro') \n", " ax.annotate(label,(df_uservector['principal component 1'], df_uservector['principal component 2']),c='r')\n", "\n", "add_user(user1,plt,\"User 1\") \n", "add_user(user2,plt,\"User 2\")\n", "\n", "ax.grid()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 2 }