// ==UserScript==
// @name InstaGrab
// @namespace http://tampermonkey.net/
// @version 0.1.0-alpha
// @description 下载 Instagram 上的图片和视频。
// @author cheer <cheer_cheer@alumni.tongji.edu.cn>
// @license MIT
// @match https://www.instagram.com/*
// @exclude https://www.instagram.com/p/*
// @exclude https://www.instagram.com/reels/*
// @exclude https://www.instagram.com/explore/
// @icon https://static.cdninstagram.com/rsrc.php/v3/yt/r/30PrGfR3xhB.png
// @require https://cdn.bootcdn.net/ajax/libs/jszip/3.9.1/jszip.min.js
// @connect instagram.com
// @connect cdninstagram.com
// @connect fbcdn.net
// @grant GM_registerMenuCommand
// @grant GM_xmlhttpRequest
// @grant unsafeWindow
// ==/UserScript==
(function () {
'use strict'
const isFunction = f => Object.prototype.toString.call(f) === '[object Function]'
const isString = o => Object.prototype.toString.call(o) === '[object String]'
const zfill = (n, count) => {
return ('' + n).padStart(count, '0')
}
const ellipsize = (s, maxLength) => {
if (s.length <= maxLength) {
return s
}
maxLength = maxLength - 2
let truncated = s.substring(0, maxLength)
// 如果最后一个字符是代理字符,需要检查是否截断了代理对
if (truncated.charCodeAt(maxLength - 1) >= 0xd800 &&
truncated.charCodeAt(maxLength - 1) <= 0xdbff &&
s.charCodeAt(maxLength) >= 0xdc00 &&
s.charCodeAt(maxLength) <= 0xdfff) {
truncated = s.substring(0, maxLength + 1)
}
return truncated + '……'
}
const getExtension = (url) => {
const u = new URL(url, location.href)
let path = u.pathname
const i = path.lastIndexOf('/')
if (i >= 0) {
path = path.substring(i + 1)
}
const dotIndex = path.lastIndexOf('.')
if (dotIndex < 0) {
return ''
}
return path.substring(dotIndex)
}
const safeFileName = (() => {
const INVALID_FILE_NAME_CHARS = new Set(('"<>|\0\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\v\f\r\u000e\u000f' +
'\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f' +
':*?\\/').split(''))
return (name) => {
let safeName = ''
for (const ch of name) {
if (INVALID_FILE_NAME_CHARS.has(ch)) {
safeName += '_'
} else {
safeName += ch
}
}
return safeName
}
})();
const archive = (zip, options) => {
const opt = {
...options,
type: 'blob'
}
let lastProgressReportTime = -1
return zip.generateAsync(opt, (metadata) => {
// 每隔 3 秒,在控制台汇报一次进度
const now = +new Date()
if (now - lastProgressReportTime < 3000) {
return
}
let file = metadata.currentFile?.trim() || ''
file = file.split('/').pop()
if (file) {
console.debug('归档进度:%c%s%c %% - %c%s%c',
'color: yellow', metadata.percent.toFixed(2).padStart(6), 'color: inherit',
'color: yellow', file.split('/').pop(), 'color: inherit')
} else {
console.debug('归档进度:%c%s%c %%',
'color: yellow', metadata.percent.toFixed(2).padStart(6), 'color: inherit')
}
lastProgressReportTime = now
})
}
const downloadBlob = (blob, filename) => {
const link = unsafeWindow.document.createElement('a')
link.href = window.URL.createObjectURL(blob)
link.download = filename || true
link.click()
window.URL.revokeObjectURL(link.href)
}
const parseCookie = str => (str || document.cookie)
.split(';')
.map(v => v.split('='))
.reduce((acc, v) => {
acc[decodeURIComponent(v[0])] = decodeURIComponent(v[1])
return acc
}, {})
const objToQueryString = obj => {
if (!obj) {
return ''
}
const qs = new URLSearchParams()
for (const key of Object.keys(obj)) {
const value = obj[key]
if (Object.prototype.toString.call(value) !== '[object Array]') {
qs.append(key, value + '')
} else {
for (const item of value) {
qs.append(key, item)
}
}
}
return qs.toString()
}
const Constants = {
queryHash: 'd4d88dc1500312af6f937f7b804c68c3',
asbdId: '198387',
appId: '936619743392459'
}
const getUsername = () => {
const match = location.pathname.match(/^\/([a-z0-9_-]+?)\/?$/i)
if (!match) {
return ''
}
const group = match[1]
if (group === 'explore') {
return ''
}
return decodeURI(group || '')
}
const getPostItems = posts => {
const items = []
for (const item of posts.items || []) {
const caption = item.caption?.text || ''
const media = []
if (item.video_versions) {
const video = item.video_versions[0]
if (video) {
media.push({
id: video.id,
code: item.code,
video: true,
width: video.width,
height: video.height,
url: video.url
})
}
} else if (item.carousel_media) {
for (const m of item.carousel_media) {
const mi = getMediaInfo(m)
if (mi) {
mi.code = item.code
media.push(mi)
}
}
} else {
const mi = getMediaInfo(item)
if (mi) {
mi.code = item.code
media.push(mi)
}
}
let takenAt = item.taken_at
if (takenAt) {
takenAt = new Date(takenAt * 1000)
} else {
takenAt = undefined
}
items.push({
caption,
media,
takenAt
})
}
return items
}
const getMediaInfo = m => {
const id = m.id
const originalHeight = m.original_height
const originalWidth = m.original_width
let originalMediaUrl
const mediaCandidates = m.image_versions2?.candidates || []
for (const c of mediaCandidates) {
if (c.width === originalWidth && c.height === originalHeight) {
originalMediaUrl = c.url
break
}
}
if (!originalMediaUrl) {
originalMediaUrl = mediaCandidates[0].url
}
if (!originalMediaUrl) {
return null
}
return {
id: id,
video: false,
width: originalWidth,
height: originalHeight,
url: originalMediaUrl
}
}
class Instagram {
constructor() {
this._username = getUsername()
}
get username() {
return this._username
}
get userId() {
return this._userId
}
set userId(value) {
this._userId = value
}
get queryHash() {
return Constants.queryHash
}
get asbdId() {
return Constants.asbdId
}
get appId() {
return Constants.appId
}
get csrfToken() {
if (this._csrfToken === undefined) {
this._csrfToken = parseCookie().csrftoken
}
return this._csrfToken
}
get wwwClaim() {
if (this._wwwClaim === undefined) {
this._wwwClaim = sessionStorage.getItem('www-claim-v2') || '0'
}
return this._wwwClaim
}
get rolloutHash() {
if (this._rolloutHash === undefined) {
this._rolloutHash = (() => {
const el = document.querySelector('[data-btmanifest$=_main]')
if (!el) {
return ''
}
const value = el.getAttribute('data-btmanifest')
return value.substring(0, value.length - 5)
})()
}
return this._rolloutHash
}
_executeApi({ method, url, params, data, headers }) {
const qs = objToQueryString(params)
if (qs) {
url = url + '?' + qs
}
const getOptions = (resolve, reject) => {
const options = {
method: method || 'GET',
url,
headers: Object.assign({
'origin': location.origin,
'referer': document.referer || 'https://www.instagram.com/',
'x-asbd-id': this.asbdId,
'x-csrftoken': this.csrfToken,
'x-ig-app-id': this.appId,
'x-ig-www-claim': this.wwwClaim,
'x-instagram-ajax': this.rolloutHash,
'x-requested-with': 'XMLHttpRequest'
}, headers || {}),
responseType: 'json',
onload(r) {
const resp = r.response
if (Object.prototype.toString.call(resp) === '[object Object]') {
if (resp.status === 'ok') {
resolve(resp)
return
}
const err = new Error('接口返回的状态不正确。')
err.name = 'ApiStatusError'
err.response = r
reject(err)
return
}
const err = new Error('接口返回的内容无法解析。')
err.name = 'UnexpectedApiResponseError'
err.response = r
reject(err)
},
onerror() {
console.error(arguments)
const err = new Error('接口调用失败。')
err.name = 'UnhandledApiError'
reject(err)
}
}
if (data !== undefined) {
options.data = isString(data) ? data : JSON.stringify(data)
}
return options
}
return new Promise((resolve, reject) => GM_xmlhttpRequest(getOptions(resolve, reject)))
}
getPosts(maxId) {
const params = {
count: 12
}
if (maxId) {
params.max_id = maxId
}
let url
if (this.userId) {
url = `https://i.instagram.com/api/v1/feed/user/${encodeURI(this.userId)}/`
} else {
url = `https://i.instagram.com/api/v1/feed/user/${encodeURI(this.username)}/username/`
}
return this._executeApi({
url,
params
})
}
}
const downloadMedia = (media, item) => {
return new Promise((resolve, reject) => {
let accept = 'image/jpg,image/apng,image/svg+xml,image/*,*/*;q=0.8'
if (media.video) {
accept = '*/*'
}
GM_xmlhttpRequest({
method: 'GET',
url: media.url,
responseType: 'blob',
timeout: 45_000,
headers: {
accept
},
onload(r) {
if (200 < r.status || r.status > 299) {
const err = new Error(`媒体资源下载失败,服务器返回 HTTP ${r.status}。`)
err.name = 'HttpStatusError'
err.status = r.status
err.response = r
reject(err)
return
}
resolve({
media,
item,
content: r.response
})
},
ontimeout() {
const err = new Error('媒体资源下载超时。')
err.name = 'TimeoutError'
reject(err)
},
onerror(r) {
const err = new Error('媒体资源下载失败。')
err.name = 'HttpError'
if (r) {
err.response = r
if (r.error != null) {
err.error = r.error
}
}
reject(err)
}
})
})
}
const download = async ({
onUserResolved,
onPostsFetched,
onMediaDownloaded,
onMediaDownloadFailed,
}) => {
const ins = new Instagram()
let seq = 0
const promises = []
let nextMaxId = null
while (true) {
const posts = await ins.getPosts(nextMaxId)
nextMaxId = posts.next_max_id
if (!ins.userId) {
// set user id
ins.userId = posts.user?.pk
if (onUserResolved) {
await onUserResolved(posts.user)
}
}
const items = getPostItems(posts)
for (const item of items) {
for (const m of item.media) {
seq++
const seqNo = seq
const promise = downloadMedia(m, item)
.then(async r => {
r.seq = seqNo
if (isFunction(onMediaDownloaded)) {
await onMediaDownloaded(r)
}
return r
})
.catch(async e => {
if (isFunction(onMediaDownloadFailed)) {
await onMediaDownloadFailed(e)
}
throw e
})
promises.push(promise)
}
}
if (isFunction(onPostsFetched)) {
await onPostsFetched(posts, items)
}
if (!posts.more_available || !posts.next_max_id) {
console.log('下载完成啦。')
break
}
}
const results = await Promise.allSettled(promises)
console.log('Results:', results)
}
const downloadAsZip = async () => {
try {
const zip = new JSZip()
let user
let userdir
await download({
onUserResolved(u) {
user = u
// 在 zip 文件中创建用户目录
userdir = zip.folder(safeFileName(u.username))
},
onPostsFetched(posts, items) {
const mediaCount = items.map(x => x.media.length).reduce((a, b) => a + b, 0)
console.log(`本次采集到 ${items.length} 个帖子,${mediaCount} 个图片/视频资源。`)
},
onMediaDownloaded(e) {
// console.log('下载啦:', e)
const caption = e.item.caption || e.media.id
const ext = getExtension(e.media.url)
const fileName = `${zfill(e.seq, 4)} - ${ellipsize(caption, 36)}${ext}`
userdir.file(safeFileName(fileName), e.content, {
date: e.item.takenAt || new Date(),
comment: `${e.item.caption} https://www.instagram.com/p/${e.media.code}/`.trim()
})
},
onMediaDownloadFailed(e) {
i++
console.error('下载失败啦', e)
}
})
console.log('下载完成,正在归档文件。')
const blob = await archive(zip, {
comment: `Instagram: https://www.instagram.com/${encodeURIComponent(user.username)}/`
})
console.log('归档完成,准备下载。')
downloadBlob(blob, safeFileName(user.full_name || user.username) + '.zip')
} catch (e) {
alert('下载失败。')
console.error('下载失败。', e)
}
}
GM_registerMenuCommand('下载 TA 的帖子', downloadAsZip)
})()