{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "9075844d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
BandGenre
0http://dbpedia.org/resource/Vintage_BlueRock
1http://dbpedia.org/resource/Tin_AlleyRock
2http://dbpedia.org/resource/The_RoulettesRock
3http://dbpedia.org/resource/BuhosRock
4http://dbpedia.org/resource/Ripe_Banana_SkinsRock
.........
195http://dbpedia.org/resource/Carl_CarltonSoul
196http://dbpedia.org/resource/Salsoul_OrchestraSoul
197http://dbpedia.org/resource/The_NextmenSoul
198http://dbpedia.org/resource/The_Jackson_SistersSoul
199http://dbpedia.org/resource/Orelha_NegraSoul
\n", "

200 rows × 2 columns

\n", "
" ], "text/plain": [ " Band Genre\n", "0 http://dbpedia.org/resource/Vintage_Blue Rock\n", "1 http://dbpedia.org/resource/Tin_Alley Rock\n", "2 http://dbpedia.org/resource/The_Roulettes Rock\n", "3 http://dbpedia.org/resource/Buhos Rock\n", "4 http://dbpedia.org/resource/Ripe_Banana_Skins Rock\n", ".. ... ...\n", "195 http://dbpedia.org/resource/Carl_Carlton Soul\n", "196 http://dbpedia.org/resource/Salsoul_Orchestra Soul\n", "197 http://dbpedia.org/resource/The_Nextmen Soul\n", "198 http://dbpedia.org/resource/The_Jackson_Sisters Soul\n", "199 http://dbpedia.org/resource/Orelha_Negra Soul\n", "\n", "[200 rows x 2 columns]" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Let's load the labels file\n", "import pandas as pd\n", "labels_file_path_str: str = \"./bands_labels.csv\"\n", "labels_df = pd.read_csv(labels_file_path_str, sep=\"\\t\")\n", "dfX = labels_df[[\"Band\"]]\n", "dfY = labels_df[[\"Genre\"]]\n", "labels_df" ] }, { "cell_type": "code", "execution_count": 2, "id": "d0a64c2f", "metadata": {}, "outputs": [], "source": [ "dfVectors = pd.read_csv(\"./oa100.txt\", sep = \" \", skiprows=[0])\n", "dfVectors.columns = [\"Band\"] + [f\"v{i}\" for i in range(0,101)] \n", "# drop last empty value\n", "dfVectors = dfVectors[[\"Band\"] + [f\"v{i}\" for i in range(0,100)]]" ] }, { "cell_type": "code", "execution_count": 3, "id": "592a0d5a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
v0v1v2v3v4v5v6v7v8v9...v90v91v92v93v94v95v96v97v98v99
Band
http://dbpedia.org/resource/Vintage_Blue-0.101661-0.1531320.227238-0.1041910.1694680.0973130.290867-0.172756-0.2148200.217480...-0.0046700.0938490.2310590.1221890.165482-0.122834-0.0588400.0761210.034616-0.276353
http://dbpedia.org/resource/Tin_Alley-0.083212-0.1949810.337864-0.1290910.1826300.1330890.364554-0.103778-0.1536160.314491...0.044746-0.0000480.2485490.2341270.186773-0.1244660.0653590.2049070.052136-0.221038
http://dbpedia.org/resource/The_Roulettes0.567702-0.2044871.514109-0.5000850.279755-0.4187951.104197-0.5334000.3570731.036889...-0.967095-0.1974161.0055670.7929351.090106-1.0297370.0778020.3590840.482350-0.829426
http://dbpedia.org/resource/Buhos-0.052553-0.1197920.331403-0.1183170.2488850.1117730.386889-0.134541-0.1374020.286090...0.049862-0.0186610.1836220.2974360.231518-0.1373090.1462910.1337070.006297-0.185696
http://dbpedia.org/resource/Ripe_Banana_Skins-0.052542-0.1270180.321062-0.1434780.2054480.1005990.367207-0.080136-0.1228460.210424...0.0141770.0158450.1829460.2591830.192614-0.1055720.0813040.1185930.019088-0.220585
..................................................................
http://dbpedia.org/resource/Carl_Carlton-0.059549-0.3321380.462567-0.4778670.4258340.4105360.9261060.041566-0.219600-0.090409...-0.2568820.1976300.7145950.6919110.401502-0.0617940.810864-0.0774210.0141730.097225
http://dbpedia.org/resource/Salsoul_Orchestra-0.054991-0.0758500.234934-0.268392-0.036323-0.2483650.8235460.1504420.6949650.445558...-0.954127-0.0299410.5324361.3085131.195635-0.3076820.2386141.0111020.107842-1.008286
http://dbpedia.org/resource/The_Nextmen0.163393-0.4769400.984449-0.887747-0.682041-0.4666491.1797780.6060940.0477630.193933...-1.6549980.1017191.7948940.429845-0.5183420.375744-0.2531640.1689150.2095500.038163
http://dbpedia.org/resource/The_Jackson_Sisters-0.121151-0.2180000.338082-0.1669150.2554040.1523930.449829-0.054923-0.1165480.212199...-0.0803440.0973200.3995610.3353780.260652-0.1506620.2506160.3190230.035124-0.265098
http://dbpedia.org/resource/Orelha_Negra-0.017703-0.0409500.050595-0.0315750.0382270.0123270.046358-0.072273-0.0453960.010551...-0.0105450.0354460.0273580.0273580.037674-0.014698-0.0008990.0108800.033274-0.069416
\n", "

200 rows × 100 columns

\n", "
" ], "text/plain": [ " v0 v1 v2 \\\n", "Band \n", "http://dbpedia.org/resource/Vintage_Blue -0.101661 -0.153132 0.227238 \n", "http://dbpedia.org/resource/Tin_Alley -0.083212 -0.194981 0.337864 \n", "http://dbpedia.org/resource/The_Roulettes 0.567702 -0.204487 1.514109 \n", "http://dbpedia.org/resource/Buhos -0.052553 -0.119792 0.331403 \n", "http://dbpedia.org/resource/Ripe_Banana_Skins -0.052542 -0.127018 0.321062 \n", "... ... ... ... \n", "http://dbpedia.org/resource/Carl_Carlton -0.059549 -0.332138 0.462567 \n", "http://dbpedia.org/resource/Salsoul_Orchestra -0.054991 -0.075850 0.234934 \n", "http://dbpedia.org/resource/The_Nextmen 0.163393 -0.476940 0.984449 \n", "http://dbpedia.org/resource/The_Jackson_Sisters -0.121151 -0.218000 0.338082 \n", "http://dbpedia.org/resource/Orelha_Negra -0.017703 -0.040950 0.050595 \n", "\n", " v3 v4 v5 \\\n", "Band \n", "http://dbpedia.org/resource/Vintage_Blue -0.104191 0.169468 0.097313 \n", "http://dbpedia.org/resource/Tin_Alley -0.129091 0.182630 0.133089 \n", "http://dbpedia.org/resource/The_Roulettes -0.500085 0.279755 -0.418795 \n", "http://dbpedia.org/resource/Buhos -0.118317 0.248885 0.111773 \n", "http://dbpedia.org/resource/Ripe_Banana_Skins -0.143478 0.205448 0.100599 \n", "... ... ... ... \n", "http://dbpedia.org/resource/Carl_Carlton -0.477867 0.425834 0.410536 \n", "http://dbpedia.org/resource/Salsoul_Orchestra -0.268392 -0.036323 -0.248365 \n", "http://dbpedia.org/resource/The_Nextmen -0.887747 -0.682041 -0.466649 \n", "http://dbpedia.org/resource/The_Jackson_Sisters -0.166915 0.255404 0.152393 \n", "http://dbpedia.org/resource/Orelha_Negra -0.031575 0.038227 0.012327 \n", "\n", " v6 v7 v8 \\\n", "Band \n", "http://dbpedia.org/resource/Vintage_Blue 0.290867 -0.172756 -0.214820 \n", "http://dbpedia.org/resource/Tin_Alley 0.364554 -0.103778 -0.153616 \n", "http://dbpedia.org/resource/The_Roulettes 1.104197 -0.533400 0.357073 \n", "http://dbpedia.org/resource/Buhos 0.386889 -0.134541 -0.137402 \n", "http://dbpedia.org/resource/Ripe_Banana_Skins 0.367207 -0.080136 -0.122846 \n", "... ... ... ... \n", "http://dbpedia.org/resource/Carl_Carlton 0.926106 0.041566 -0.219600 \n", "http://dbpedia.org/resource/Salsoul_Orchestra 0.823546 0.150442 0.694965 \n", "http://dbpedia.org/resource/The_Nextmen 1.179778 0.606094 0.047763 \n", "http://dbpedia.org/resource/The_Jackson_Sisters 0.449829 -0.054923 -0.116548 \n", "http://dbpedia.org/resource/Orelha_Negra 0.046358 -0.072273 -0.045396 \n", "\n", " v9 ... v90 \\\n", "Band ... \n", "http://dbpedia.org/resource/Vintage_Blue 0.217480 ... -0.004670 \n", "http://dbpedia.org/resource/Tin_Alley 0.314491 ... 0.044746 \n", "http://dbpedia.org/resource/The_Roulettes 1.036889 ... -0.967095 \n", "http://dbpedia.org/resource/Buhos 0.286090 ... 0.049862 \n", "http://dbpedia.org/resource/Ripe_Banana_Skins 0.210424 ... 0.014177 \n", "... ... ... ... \n", "http://dbpedia.org/resource/Carl_Carlton -0.090409 ... -0.256882 \n", "http://dbpedia.org/resource/Salsoul_Orchestra 0.445558 ... -0.954127 \n", "http://dbpedia.org/resource/The_Nextmen 0.193933 ... -1.654998 \n", "http://dbpedia.org/resource/The_Jackson_Sisters 0.212199 ... -0.080344 \n", "http://dbpedia.org/resource/Orelha_Negra 0.010551 ... -0.010545 \n", "\n", " v91 v92 v93 \\\n", "Band \n", "http://dbpedia.org/resource/Vintage_Blue 0.093849 0.231059 0.122189 \n", "http://dbpedia.org/resource/Tin_Alley -0.000048 0.248549 0.234127 \n", "http://dbpedia.org/resource/The_Roulettes -0.197416 1.005567 0.792935 \n", "http://dbpedia.org/resource/Buhos -0.018661 0.183622 0.297436 \n", "http://dbpedia.org/resource/Ripe_Banana_Skins 0.015845 0.182946 0.259183 \n", "... ... ... ... \n", "http://dbpedia.org/resource/Carl_Carlton 0.197630 0.714595 0.691911 \n", "http://dbpedia.org/resource/Salsoul_Orchestra -0.029941 0.532436 1.308513 \n", "http://dbpedia.org/resource/The_Nextmen 0.101719 1.794894 0.429845 \n", "http://dbpedia.org/resource/The_Jackson_Sisters 0.097320 0.399561 0.335378 \n", "http://dbpedia.org/resource/Orelha_Negra 0.035446 0.027358 0.027358 \n", "\n", " v94 v95 v96 \\\n", "Band \n", "http://dbpedia.org/resource/Vintage_Blue 0.165482 -0.122834 -0.058840 \n", "http://dbpedia.org/resource/Tin_Alley 0.186773 -0.124466 0.065359 \n", "http://dbpedia.org/resource/The_Roulettes 1.090106 -1.029737 0.077802 \n", "http://dbpedia.org/resource/Buhos 0.231518 -0.137309 0.146291 \n", "http://dbpedia.org/resource/Ripe_Banana_Skins 0.192614 -0.105572 0.081304 \n", "... ... ... ... \n", "http://dbpedia.org/resource/Carl_Carlton 0.401502 -0.061794 0.810864 \n", "http://dbpedia.org/resource/Salsoul_Orchestra 1.195635 -0.307682 0.238614 \n", "http://dbpedia.org/resource/The_Nextmen -0.518342 0.375744 -0.253164 \n", "http://dbpedia.org/resource/The_Jackson_Sisters 0.260652 -0.150662 0.250616 \n", "http://dbpedia.org/resource/Orelha_Negra 0.037674 -0.014698 -0.000899 \n", "\n", " v97 v98 v99 \n", "Band \n", "http://dbpedia.org/resource/Vintage_Blue 0.076121 0.034616 -0.276353 \n", "http://dbpedia.org/resource/Tin_Alley 0.204907 0.052136 -0.221038 \n", "http://dbpedia.org/resource/The_Roulettes 0.359084 0.482350 -0.829426 \n", "http://dbpedia.org/resource/Buhos 0.133707 0.006297 -0.185696 \n", "http://dbpedia.org/resource/Ripe_Banana_Skins 0.118593 0.019088 -0.220585 \n", "... ... ... ... \n", "http://dbpedia.org/resource/Carl_Carlton -0.077421 0.014173 0.097225 \n", "http://dbpedia.org/resource/Salsoul_Orchestra 1.011102 0.107842 -1.008286 \n", "http://dbpedia.org/resource/The_Nextmen 0.168915 0.209550 0.038163 \n", "http://dbpedia.org/resource/The_Jackson_Sisters 0.319023 0.035124 -0.265098 \n", "http://dbpedia.org/resource/Orelha_Negra 0.010880 0.033274 -0.069416 \n", "\n", "[200 rows x 100 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfXvectors = pd.concat([dfX.set_index(\"Band\"), dfVectors.set_index(\"Band\")], axis=1, join=\"inner\")\n", "dfXvectors" ] }, { "cell_type": "code", "execution_count": 7, "id": "8c240cf8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.65" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# evaluate in 10-fold CV\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.model_selection import cross_val_score\n", "import numpy as np\n", "clf = MLPClassifier(max_iter=10000)\n", "scores = cross_val_score(clf, dfXvectors, dfY.values.ravel(), cv=10)\n", "scores.mean()" ] }, { "cell_type": "code", "execution_count": 8, "id": "67a1e184", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.07745966692414834" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scores.std()" ] }, { "cell_type": "code", "execution_count": 10, "id": "124d8717", "metadata": {}, "outputs": [], "source": [ "# Create new ground truth for visualization by classes\n", "# Targets: artists, record labels, cities\n", "# get all papers from the graph\n", "from rdflib import Graph, RDF, URIRef\n", "import re\n", "import pandas as pd\n", "import numpy as np\n", "\n", "g = Graph()\n", "g.parse('./artists_graph.nt')\n", "\n", "lst_entities = []\n", "lst_Y = []\n", "\n", "regexp = re.compile('^http://dbpedia.org/resource/[0-9a-zA-Z_()]+$',re.ASCII)\n", "\n", "class1_entities = set()\n", "for e in g.subjects(RDF.type,URIRef(\"http://dbpedia.org/ontology/RecordLabel\")):\n", " s = e.toPython()\n", " if regexp.match(s):\n", " class1_entities.add(s)\n", "\n", "for e in class1_entities:\n", " lst_entities.append(e)\n", " lst_Y.append(\"label\")\n", " \n", "class2_entities = set()\n", "for e in g.subjects(RDF.type,URIRef(\"http://dbpedia.org/ontology/MusicalWork\")):\n", " s = e.toPython()\n", " if regexp.match(s):\n", " class2_entities.add(s)\n", "\n", "for e in class2_entities:\n", " lst_entities.append(e)\n", " lst_Y.append(\"work\")\n", " \n", "class3_entities = set()\n", "for e in g.subjects(RDF.type,URIRef(\"http://dbpedia.org/ontology/Genre\")):\n", " s = e.toPython()\n", " if regexp.match(s):\n", " class3_entities.add(s)\n", "\n", "for e in class3_entities:\n", " lst_entities.append(e)\n", " lst_Y.append(\"genre\")\n", "\n", "# all other entities\n", "other_entities = set()\n", "for e in g.subjects():\n", " s = e.toPython()\n", " if regexp.match(s):\n", " if not (s in lst_entities):\n", " other_entities.add(s)\n", " \n", "for e in other_entities:\n", " lst_entities.append(e)\n", " lst_Y.append(\"-\")\n", "\n", "dfY = pd.DataFrame (lst_Y, columns = ['class'])" ] }, { "cell_type": "code", "execution_count": 12, "id": "ffe673c3", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Create a visualization\n", "from sklearn.decomposition import PCA\n", "import matplotlib.pyplot as plt\n", "\n", "dfVectorsOnly = dfVectors[[f\"v{i}\" for i in range(0,100)]]\n", "\n", "pca = PCA(n_components=2)\n", "pca_result = pca.fit_transform(dfVectorsOnly)\n", "principalDf = pd.DataFrame(data = pca_result\n", " , columns = ['principal component 1', 'principal component 2'])\n", "\n", "finalDf = pd.concat([principalDf, dfY], axis = 1)\n", "fig = plt.figure(figsize = (8,8))\n", "ax = fig.add_subplot(1,1,1) \n", "ax.set_xlabel('Principal Component 1', fontsize = 15)\n", "ax.set_ylabel('Principal Component 2', fontsize = 15)\n", "\n", "targets = ['work','label', 'genre']\n", "colors = ['r', 'b', 'g']\n", "for target, color in zip(targets,colors):\n", " indicesToKeep = finalDf['class'] == target\n", " ax.scatter(finalDf.loc[indicesToKeep, 'principal component 1']\n", " , finalDf.loc[indicesToKeep, 'principal component 2']\n", " , c = color)\n", "ax.legend(targets)\n", "ax.grid()" ] }, { "cell_type": "code", "execution_count": null, "id": "47ab6391", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }