// ==UserScript==
// @name MusicBrainz: Archive.org importer
// @namespace http://www.jens-bertram.net/userscripts/import-internetarchive
// @description Import audio files and collections into Musicbrainz. Also supports scanning bookmarks and search results for MusicBrainz relations.
// @icon http://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Internet_Archive_logo_and_wordmark.png/240px-Internet_Archive_logo_and_wordmark.png
// @supportURL https://github.com/JensBee/userscripts
// @license MIT
// @version 0.4.7beta
//
// @grant none
// @require https://code.jquery.com/jquery-2.1.1.min.js
// @require https://greasyfork.org/scripts/5140-musicbrainz-function-library/code/MusicBrainz%20function%20library.js?version=21997
//
// @include *://archive.org/details/*
// @include *://archive.org/bookmarks.php
// @include *//archive.org/search.php*
// ==/UserScript==
var mbz = mbz || {};
mbz.archive_org_importer = {
// https://archive.org/about/faqs.php#Audio
audioFormats: [
'mp3',
'flac',
'ogg',
'audio',
'aiff',
'shorten',
'weba'],
/**
* Check file type for audio format. Filters out most (but not all) other
* file types.
* @formatStr file format name
*/
isAudioFile: function(formatStr) {
formatStr = formatStr.toLowerCase();
for (var format in this.audioFormats) {
if (formatStr.contains(format)) {
return true;
}
}
return false;
}
};
/**
* Functions to parse a list of links for MusicBrainz relations.
*/
mbz.archive_org_importer.linkCheck = {
btn: MBZ.Html.getMbzButton('Check link relations',
'Check entries being linked from MusicBrainz.'),
/**
* Link scanner status values
*/
links: {
found: null,
checked: 0,
matched: 0
},
/**
* RexEx to strip off current base-url.
*/
re: new RegExp('^'+window.location.origin),
/**
* Scan status elements.
*/
status: {
base: $('<span>'),
current: $('<span>'),
matched: $('<span>')
},
/**
* Start scanner.
* @params[links] jQuery object with target links
* @params[controlEl] jQuery element to append controls to
*/
scan: function(params) {
this.links.found = params.links;
if (this.links.found.length > 0 && params.controlEl) {
var self = this;
this.status.current.text(this.links.checked);
this.status.matched.text(this.links.matched);
this.status.base.append(' Checked: ')
.append(this.status.current)
.append(' Matches: ')
.append(this.status.matched)
.hide();
this.btn.click(function () {
self.btn.prop("disabled", true);
self.btn.text("Checking..");
self.status.base.show();
var urls = [];
$.each(self.links.found, function(idx, link) {
urls.push('http://archive.org'+$(link).attr('href'));
});
MBZ.Release.getUrlRelations({
urls: MBZ.Util.expandProtocols(urls),
cb: self.rel.attach,
cbInc: self.rel.inc,
cbDone: self.rel.done,
scope: self
});
});
params.controlEl.append(this.btn).append(this.status.base);
}
},
/**
* Callback handlers for relation parsing.
*/
rel: {
/**
* Relation was found, data is attached.
*/
attach: function(data) {
if (!data._res) {
return;
}
var res = data._res.replace(this.re, '');
var self = this;
$.each(self.links.found, function(idx, link) {
var link = $(link);
if (link.attr('href') == res) {
self.status.matched.text(self.links.matched++);
MBZ.Release.insertMBLink(data, link);
}
});
},
/**
* All relations have been resolved.
*/
done: function() {
this.status.base.html(' ' + this.links.checked
+ ' links checked with ' + this.links.matched + ' matches.');
this.btn.text('Check done');
},
/**
* A relation was checked.
*/
inc: function() {
this.status.current.text(this.links.checked++);
},
}
}
/**
* Functions to import a single release.
*/
mbz.archive_org_importer.Release = function() {
this.btn = MBZ.Html.getMbzButton('Import',
'Import this release to MusicBrainz');
this.dEl = $('<div id="mbzDialog">').hide(); // dialog elements
this.mbLinkTarget = null;
this.importRunning = false;
this.importInitialized = false;
// release data object finally passed on to MusicBrainz.
this.release = new MBZ.Release();
this.tracks = new mbz.archive_org_importer.Release.Tracks();
var self = this;
var submitted = false;
/**
* Initialize release parsing.
*/
function init() {
this.tracks.detectSources();
var playerJSON = this.tracks.getPlayerJSON();
if (playerJSON.length == 0) {
console.error('Player JSON data not found. Disabling MusicBrainz import.');
return;
}
var cEl = $('<div id="mbzControls">'); // control elements
var url = MBZ.Util.rmTrSlash($(location).attr('href'));
var urlJSON = url + '&output=json';
var trackData = $.parseJSON(playerJSON);
var pageJSON = null; // page data as JSON object
this.btn.click(function () {
if (submitted) {
self.release.submitRelease();
return;
}
if (!self.importInitialized) {
self.btn.prop("disabled", true);
self.btn.text("Initializing import");
// prepare source data
$.getJSON(urlJSON, function (data) {
pageJSON = data;
self.tracks.parseSources.call(self, data);
}).fail(function(jqxhr, textStatus, error) {
var err = textStatus + ', ' + error;
console.error("Request (" + urlJSON + ") failed: " + err);
self.btn.text("ERROR");
});
return;
}
self.dEl.hide();
self.btn.prop("disabled", true);
// *** static data
self.release.addMedium({
idx: 0,
fmt: 'Digital Media'
});
self.release.setPackaging('none');
self.release.setNote('Imported from The Internet Archive (' + url + ')');
// *** parsed data from release JSON object
self.parseJSON.urls(self.release, pageJSON);
self.parseJSON.artists(self.release, pageJSON);
self.parseJSON.title(self.release, pageJSON);
self.parseJSON.labels(self.release, pageJSON);
self.parseJSON.release(self.release, pageJSON);
self.parseJSON.annotation(self.release, pageJSON);
self.tracks.commit(self.release);
// submit
//self.release.dump();
self.btn.text("Submitting..");
self.release.submitRelease(function() {
submitted = true;
self.btn.prop("disabled", false);
self.btn.text("Submit again");
});
});
$('.breadcrumbs').before(cEl.append(this.btn));
cEl.after(self.dEl);
self.mbLinkTarget = self.btn;
MBZ.Release.getUrlRelations({
urls: MBZ.Util.expandProtocol(url),
cb: MBZ.Release.insertMBLink,
scope: self
});
};
init.call(this);
};
mbz.archive_org_importer.Release.prototype = {
/**
* Callback function. Called when all sources are parsed.
*/
enableImport: function() {
this.importInitialized = true;
if (this.tracks.validSources > 1) {
this.tracks.showSources.call(this);
this.btn.text("Start import");
this.btn.prop("disabled", false);
} else {
this.btn.click();
}
}
};
/**
* Parse JSON response for a release.
*/
mbz.archive_org_importer.Release.prototype.parseJSON = {
annotation: function (release, data) {
if (data.metadata.notes) {
release.setAnnotation(data.metadata.notes[0]);
}
},
artists: function (release, data) {
if (data.metadata.creator) {
$.each(data.metadata.creator, function (idx, val) {
release.addArtist(val);
});
}
},
labels: function (release, data) {
if (data.metadata.collection) {
$.each(data.metadata.collection, function (idx, val) {
release.addLabel({
name: val,
catNo: data.metadata.identifier[0]
});
});
}
},
release: function (releaseObj, data) {
var dates = data.metadata.date || data.metadata.publicdate;
if (dates) {
$.each(dates, function (idx, val) {
var date = val.match(/([0-9]{4})-([0-9]{2})-([0-9]{2}).*/);
if (date && date.length == 4) {
releaseObj.addRelease({
y: date[1],
m: date[2],
d: date[3],
cc:'XW'
});
}
});
}
},
urls: function (release, data) {
var url = $(location).attr('href');
release.addUrl(url, '75');
release.addUrl(url, '85');
if (data.creativecommons && data.creativecommons.license_url) {
release.addUrl(data.creativecommons.license_url, '301');
}
},
title: function (release, data) {
if (data.metadata.title) {
release.setTitle(data.metadata.title[0]);
}
},
/**
* First parse track list from player JSON data. The provided information
* may not be complete, so gather the parsed data in a local array.
*/
tracksFromPlayer: function(data) {
if (data.length > 0) {
var self = this;
$.each(data, function(idx, val) {
var duration = MBZ.Util.hmsToSeconds(val.duration);
duration = Math.round(parseFloat(duration) * 1000); // sec to msec
if (isNaN(duration)) {
duration = null;
}
// get source file name
var file = val.sources[0].file;
if (file) {
self.tracks.updateData({
med: 0,
tit: val.title.replace(/^[0-9]+\.\s/,''),
idx: idx,
dur: duration,
file: MBZ.Util.getLastPathSegment(file)
});
} else {
console.log("Could not parse file name from player JSON.");
}
});
}
},
tracksFromPage: function(data) {
if (data && data.files) {
var self = this;
$.each(data.files, function(file, val){
if (mbz.archive_org_importer.isAudioFile(val.format)) {
var fileName = file.replace(/^\//, ''); // remove leading slash
var duration = MBZ.Util.hmsToSeconds(val.duration);
duration = Math.round(parseFloat(duration) * 1000); // sec to msec
if (isNaN(duration)) {
duration = null;
}
self.tracks.updateData({
med: 0,
tit: val.title,
dur: duration,
file: fileName
});
}
});
}
}
};
/**
* Handle track sources and the displaying of those data.
*/
mbz.archive_org_importer.Release.Tracks = function() {
/**
* Target element to display track source contents.
*/
var contentHtml = $('<div>');
/**
* Store parsed track data objects to allow multiple data editing passes.
*/
var tracks = {};
/**
* Track data sources available.
*/
var sources = [];
/**
* Track source to use.
*/
var selectedSource = null;
/**
* Number of unique valid sources.
*/
var validSources = 0;
/**
* Add all available track sources to a user dialog.
*/
function addSources(show) {
var sourceSelect = $('<select>');
sourceSelect.on('change', function(){
selectedSource = this.value;
showSources();
});
// add sources
$.each(sources, function(idx, source) {
if (!source.dupe && source.files && source.files.length > 0) {
var sourceTitle = '';
if (source.type == 'player') {
sourceTitle = 'Web Player';
} else {
sourceTitle = 'Playlist (' + source.name + ')';
}
sourceSelect.append('<option value="' + idx + '">'
+ sourceTitle + '</option>');
}
});
// add elements
this.dEl.append(sourceSelect);
sourceSelect.before('Found multiple track listings with different items.'
+ '<br/>Please select a track data source to import: ');
this.dEl.append(contentHtml);
};
/**
* Commit currently selected tracks source to be included in MusicBrainz
* submission.
*/
this.commit = function(release) {
$.each(sources[selectedSource].files, function(idx, val) {
tracks[val].idx = idx; // reset track number
release.addTrack(tracks[val]);
});
};
/**
* Check which track sources are available. Called on page loading.
*/
this.detectSources = function() {
// internal player data
var playerJSON = this.getPlayerJSON();
if (playerJSON.length > 0) {
sources.push({
type: 'player',
name: 'web-player',
data: $.parseJSON(playerJSON)
});
}
// playlists
$('#ff0 a').each(function(idx, item){
var url = $(item).attr('href');
if (url.endsWith('.m3u')) {
sources.push({
type: 'playlist',
name: MBZ.Util.getLastPathSegment(decodeURIComponent(url)),
url: url
});
}
});
if (sources.length > 0) {
// default to first entry
selectedSource = 0;
}
};
/**
* Parse track data from all available sources. Called, when import is
* initialized.
* @pageData page data as JSON object
*/
this.parseSources = function(pageData) {
var self = this;
var sourceParsedCount = 0;
function incParsedCount() {
// increase parsed sources counter
if (++sourceParsedCount == sources.length) {
squashSources.call(self);
if (validSources > 1) {
addSources.call(self);
}
// all data parsed, proceed with import
self.enableImport();
}
}
function getTrackList(source) {
if (source.files && source.files.length > 0) {
// looks like data is already set
return;
}
source.files = [];
if (source.type == 'player') {
$.each(source.data, function(idx, val) {
var file = val.sources[0].file;
if (file) {
source.files.push(MBZ.Util.getLastPathSegment(file));
}
});
// done
incParsedCount();
} else if (source.type == 'playlist') {
// needed, since we get redirected to differet subdomain
var url = 'https://cors-anywhere.herokuapp.com/archive.org:443'
+ source.url;
$.get(url, function(data) {
//source.data = data;
var files = data.split('\n');
$.each(files, function(idx, file) {
file = MBZ.Util.getLastPathSegment(file.trim());
if (file.length > 0) {
source.files.push(file);
}
});
}, 'text').fail(function(jqxhr, textStatus, error) {
var err = textStatus + ', ' + error;
console.error("Request (" + url + ") failed: " + err);
}).always(function() {
// done
incParsedCount();
});
}
}
// First try to parse data from the internal player as a basis. This data
// may be incomplete (cropped track names) so add it first and overwrite it
// later with more complete data from the page's JSON.
$.each(sources, function(idx, val) {
var source = sources[idx];
if (source.type == 'player') {
// parse some track data from the player
self.parseJSON.tracksFromPlayer.call(self, source.data);
}
});
// try to get missing data from page's JSON object
if (pageData.files) {
self.parseJSON.tracksFromPage.call(self, pageData);
}
// since track data is available, pase the track list for each source
$.each(sources, function(idx, val) {
getTrackList(val);
});
};
/**
* Initialize and show the source's track data dialog. Also called, to update
* on track source data select change.
*/
this.showSources = function() {
var self = this;
var trackTable = $('<table id="mbzImportTrackTable">'
+ '<thead>'
+ '<tr>'
+ '<td>#</td><td>Title</td><td>Length</td>'
+ '</tr></thead></table>');
var trackList = $('<tbody>');
$.each(sources[selectedSource].files,
function(idx, val) {
if (tracks[val]) {
var duration = data[val].dur;
duration = (duration ? MBZ.Util.msToHms(duration) : '—');
trackList.append($('<tr>'
+ '<td>' + (idx + 1) + '</td>'
+ '<td>' + tracks[val].tit + '</td>'
+ '<td>' + duration + '</td>'
+ '</tr>'));
} else {
console.warn('No data for file "' + val + '" found.');
}
});
trackTable.append(trackList);
contentHtml.html(trackTable);
this.dEl.show();
};
/**
* Remove duplicated sources which have the same track lists.
*/
function squashSources() {
// go through all source's files
for (var i=0; i<sources.length; i++) {
var src = sources[i];
if (!src.dupe) {
var a = src.files;
if (!a || a.length == 0) {
src.dupe = true;
console.warn("Remove source '" + src.name + "' no files found.");
} else if ((i + 1) < sources.length) {
for (var j=i + 1; j<sources.length; j++) {
var b = sources[j];
if (!b.dupe) {
if (mbz.archive_org_importer.Release.Tracks
.compareSourceFiles(a, b.files)) {
b.dupe = true;
}
}
}
}
}
}
// count valid sources
$.each(sources, function(idx, val) {
if (!val.dupe && val.files.length > 0) {
validSources++;
}
});
};
/**
* Update track metadata with new values. If a value is already set, it will
* get overwritten with the new one.
*/
this.updateData = function(data) {
var isValid = mbz.archive_org_importer.Release.Tracks.isValidTrackData;
if (tracks[data.file]) {
var tData = tracks[data.file];
// update
if (isValid(data.med)) {
tData.med = data.med;
}
if (isValid(data.tit)) {
tData.tit = data.tit.trim();
}
if (isValid(data.idx)) {
tData.idx = data.idx;
}
if (isValid(data.dur)) {
tData.dur = data.dur;
}
} else {
// add new
tracks[data.file] = data;
}
};
};
/**
* Check if some track data is valid (i.e. not empty or undefined).
*/
mbz.archive_org_importer.Release.Tracks.isValidTrackData = function (dataEntry) {
if (typeof dataEntry !== 'undefined' && dataEntry != null) {
if (typeof dataEntry === 'string') {
if (dataEntry.trim().length > 0) {
return true;
}
return false;
} else {
return true;
}
}
return false;
};
/**
* Compare files for two sources.
*/
mbz.archive_org_importer.Release.Tracks.compareSourceFiles = function(a, b) {
if (a.length != b.length) {
return false;
}
for (var i=0; i<a.length; i++) {
if (a[i] != b[i]) {
return false;
}
}
return true;
};
mbz.archive_org_importer.Release.Tracks.prototype = {
/**
* Get player JSON data as string.
* @return player JSON data or empty string, if nothing was found
*/
getPlayerJSON: function() {
var pJSON = $('#midcol > script').text().trim()
.match(/Play\([\s\S]*?(\[{[\s\S]*}\])/);
if (pJSON && pJSON[1]) {
return pJSON[1];
}
return "";
}
};
mbz.archive_org_importer.init = function() {
var pageType = window.location.pathname.split('/');
if (pageType.length >= 2) {
pageType = pageType[1].toLowerCase()
} else {
return;
}
if (pageType == 'details' && $('body').hasClass('Audio')) {
// import a release
MBZ.Html.globStyle.append(
'#mbzImportTrackTable {margin-top:0.5em;margin-left:0.5em;}'
+ '#mbzImportTrackTable thead {'
+ 'font-weight:bold;'
+ 'background-color:rgba(115,108,174,0.5);'
+ '}'
+ '#mbzImportTrackTable tbody td:nth-child(1) {'
+ 'border-right:1px solid #666;'
+ 'padding-right:0.15em;'
+ '}'
+ '#mbzImportTrackTable tbody tr:nth-child(odd) {'
+ 'background-color:rgba(0,0,0,0.1);'
+ '}'
+ '#mbzImportTrackTable tbody td:nth-child(2) {'
+ 'padding-left:0.3em;'
+ '}'
+ '#mbzImportTrackTable tbody td:nth-child(3) {'
+ 'padding-left:0.3em;'
+ 'font-family:courier,monospace;'
+ 'text-align:right;'
+ '}'
);
//mbz.archive_org_importer.release.init();
new mbz.archive_org_importer.Release();
} else if (pageType == 'bookmarks.php') {
// check all bookmarks for MusicBrainz relations
var links = $('.box>table>tbody a').filter(function(idx) {
// no way to check type for audio here
return $(this).attr('href').startsWith('/details/');
});
var control = $('<div id="mbzControls">');
$('.box>h1').after(control);
if (links.length > 0) {
mbz.archive_org_importer.linkCheck.scan({
links: links,
controlEl: control
});
}
} else if (pageType == 'search.php') {
var links = [];
// check audio links for MusicBrainz relations
var audioItems = $('.numberCell>img[alt="[audio]"]').filter(function(idx) {
// get the first linked audio item..
var el = $(this).parent().next().children('a')[0];
if (el) {
el = $(el);
if (el.attr('href').startsWith('/details/')) {
// ..and extract it's url
links.push(el);
}
}
});
var control = $('<div>');
var col = $('<td colspan="2">');
col.append(control);
var row = $('<tr>').append(col);
$('.resultsTable').prepend(row);
if (links.length > 0) {
mbz.archive_org_importer.linkCheck.scan({
links: links,
controlEl: control
});
}
}
};
mbz.archive_org_importer.init();