-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataset_manager.py
More file actions
99 lines (82 loc) · 3.36 KB
/
dataset_manager.py
File metadata and controls
99 lines (82 loc) · 3.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# TODO : Split all songs data into training, testing and validation set
# TODO : Have a funtion to generate labels and absolute file paths for all songs in a folder. This will generate "metadata.txt" for the songs in a given folder
# Metadata.txt will be of format absolute_file_path|label ( Use label "None" for no label/ songs outside the genres folder structure, and "Folder_names" as labels)
# these Metadata.txt will go to the respective folders with names training.txt, testing.txt , validation.txt and predict_new.txt
import os
from random import shuffle
import math
import json
from random import shuffle
import math
from sklearn.model_selection import train_test_split
import json
import config
import numpy as np
import pandas as pd
import os
import ast
def get_all_song_paths_and_labels(allSongPath):
genres = [ d for d in os.listdir(allSongPath) if os.path.isdir(os.path.join(allSongPath, d)) ]
#print genres
song_list = []
genre_names = open(config.GENRES_FILE,"w")
for genre in genres:
genre_songs = []
genre_names.write(genre+"\n")
song_folder = allSongPath + "/" + genre
for path, dirs, files in os.walk(song_folder):
for file in files:
if file.endswith(".au"):
song_path = path + "/" + file
#print song_path
song = [song_path,genre]
song_list.append(song)
#song_list.append(genre_songs)
all_songs_path = open(config.ALL_SONGS_PATHS,"w")
all_songs_label = open(config.ALL_SONGS_LABELS,"w")
for path, label in song_list:
all_songs_path.write(path+ "\n" )
all_songs_label.write(label+ "\n" )
all_songs_path.close()
all_songs_label.close()
genre_names.close()
def get_all_song_paths_and_labels_FMA():
load = lambda file_path: [line.rstrip('\n') for line in open(file_path)]
name2num = lambda namelist,numlist : [numlist.index(name) for name in namelist]
genre_map={'Classical': 'classical',
'Hip-Hop' : 'hiphop',
'Country' : 'country',
'Jazz' : 'jazz',
'Pop': 'pop',
'Rock' : 'rock',
'Blues' : 'blues'}
tags=load(config.GENRES_FILE)
genre_songs=[]
csv_filepath=config.FMA_DATASET_CSV
tracks = pd.read_csv(csv_filepath, index_col=0, header=[0, 1])
print (tracks.describe())
small = tracks['set', 'subset'] <= 'small'
tracks_dict = tracks.loc[small, ('track', 'genre_top')]
print (tracks_dict)
#print (tracks_dict.get_value(2))
#print (tracks.describe())
#print (tracks.columns)
#print tracks[2]['track']['track_id']
#print set(tracks['track']['genre_top'])
all_songs_path = open(config.ALL_SONGS_PATHS,"w")
all_songs_label = open(config.ALL_SONGS_LABELS,"w")
song_folder = config.SONG_FLODER_FMA # EDIT
for path, dirs, files in os.walk(song_folder):
for file in files:
if file.endswith(".mp3"):
song_path = path + "/" + file
#print song_path
song_id=int(song_path[-10:-4])
print (song_id)
genre= tracks_dict.get_value(song_id)
if genre in genre_map:
song = [song_path,genre_map[genre]]
genre_songs.append(song)
for path, label in genre_songs:
all_songs_path.write(path+ "\n")
all_songs_label.write(label+ "\n")