Chapter Fetch Improvements (#754)

* Chapter fetch improvements

* Update previous date uploads

* Lint

* Fix backup inserts

* Remove extra maxSeenUploadDate

* Port downloaded over

* Make sure to set isDownloaded on all inserts
This commit is contained in:
Mitchell Syer
2023-11-04 18:09:40 -04:00
committed by GitHub
parent 21e325af9c
commit 0785f4d0f5
6 changed files with 261 additions and 134 deletions

View File

@@ -0,0 +1,45 @@
package eu.kanade.tachiyomi.util.chapter
object ChapterSanitizer {
fun String.sanitize(title: String): String {
return trim()
.removePrefix(title)
.trim(*CHAPTER_TRIM_CHARS)
}
private val CHAPTER_TRIM_CHARS =
arrayOf(
// Whitespace
' ',
'\u0009',
'\u000A',
'\u000B',
'\u000C',
'\u000D',
'\u0020',
'\u0085',
'\u00A0',
'\u1680',
'\u2000',
'\u2001',
'\u2002',
'\u2003',
'\u2004',
'\u2005',
'\u2006',
'\u2007',
'\u2008',
'\u2009',
'\u200A',
'\u2028',
'\u2029',
'\u202F',
'\u205F',
'\u3000',
// Separators
'-',
'_',
',',
':',
).toCharArray()
}

View File

@@ -7,17 +7,21 @@ package suwayomi.tachidesk.manga.impl
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import com.google.common.cache.Cache
import com.google.common.cache.CacheBuilder
import eu.kanade.tachiyomi.source.model.SChapter import eu.kanade.tachiyomi.source.model.SChapter
import eu.kanade.tachiyomi.source.model.SManga import eu.kanade.tachiyomi.source.model.SManga
import eu.kanade.tachiyomi.source.online.HttpSource import eu.kanade.tachiyomi.source.online.HttpSource
import eu.kanade.tachiyomi.util.chapter.ChapterRecognition import eu.kanade.tachiyomi.util.chapter.ChapterRecognition
import eu.kanade.tachiyomi.util.chapter.ChapterSanitizer.sanitize
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import kotlinx.serialization.Serializable import kotlinx.serialization.Serializable
import mu.KotlinLogging import mu.KotlinLogging
import org.jetbrains.exposed.dao.id.EntityID import org.jetbrains.exposed.dao.id.EntityID
import org.jetbrains.exposed.sql.Op import org.jetbrains.exposed.sql.Op
import org.jetbrains.exposed.sql.ResultRow import org.jetbrains.exposed.sql.ResultRow
import org.jetbrains.exposed.sql.SortOrder import org.jetbrains.exposed.sql.SortOrder
import org.jetbrains.exposed.sql.SortOrder.ASC
import org.jetbrains.exposed.sql.SqlExpressionBuilder.inList import org.jetbrains.exposed.sql.SqlExpressionBuilder.inList
import org.jetbrains.exposed.sql.and import org.jetbrains.exposed.sql.and
import org.jetbrains.exposed.sql.batchInsert import org.jetbrains.exposed.sql.batchInsert
@@ -41,7 +45,11 @@ import suwayomi.tachidesk.manga.model.table.MangaTable
import suwayomi.tachidesk.manga.model.table.PageTable import suwayomi.tachidesk.manga.model.table.PageTable
import suwayomi.tachidesk.manga.model.table.toDataClass import suwayomi.tachidesk.manga.model.table.toDataClass
import suwayomi.tachidesk.server.serverConfig import suwayomi.tachidesk.server.serverConfig
import java.lang.Long
import java.time.Instant import java.time.Instant
import java.util.TreeSet
import java.util.concurrent.TimeUnit
import kotlin.math.max
object Chapter { object Chapter {
private val logger = KotlinLogging.logger { } private val logger = KotlinLogging.logger { }
@@ -109,133 +117,179 @@ object Chapter {
} }
} }
val map: Cache<Int, Mutex> =
CacheBuilder.newBuilder()
.expireAfterAccess(10, TimeUnit.MINUTES)
.build()
suspend fun fetchChapterList(mangaId: Int): List<SChapter> { suspend fun fetchChapterList(mangaId: Int): List<SChapter> {
val manga = getManga(mangaId) val mutex = map.get(mangaId) { Mutex() }
val source = getCatalogueSourceOrStub(manga.sourceId.toLong()) val chapterList =
mutex.withLock {
val manga = getManga(mangaId)
val source = getCatalogueSourceOrStub(manga.sourceId.toLong())
val sManga = val sManga =
SManga.create().apply { SManga.create().apply {
title = manga.title title = manga.title
url = manga.url url = manga.url
}
val numberOfCurrentChapters = getCountOfMangaChapters(mangaId)
val chapterList = source.getChapterList(sManga)
// Recognize number for new chapters.
chapterList.forEach { chapter ->
(source as? HttpSource)?.prepareNewChapter(chapter, sManga)
val chapterNumber = ChapterRecognition.parseChapterNumber(manga.title, chapter.name, chapter.chapter_number.toDouble())
chapter.chapter_number = chapterNumber.toFloat()
}
val now = Instant.now().epochSecond
val chaptersInDb =
transaction {
ChapterTable.select { ChapterTable.manga eq mangaId }
.map { ChapterTable.toDataClass(it) }
.toSet()
}
val chaptersToInsert = mutableListOf<ChapterDataClass>()
val chaptersToUpdate = mutableListOf<ChapterDataClass>()
chapterList.reversed().forEachIndexed { index, fetchedChapter ->
val chapterEntry = chaptersInDb.find { it.url == fetchedChapter.url }
val chapterData =
ChapterDataClass.fromSChapter(
fetchedChapter,
chapterEntry?.id ?: 0,
index + 1,
now,
mangaId,
runCatching {
(source as? HttpSource)?.getChapterUrl(fetchedChapter)
}.getOrNull(),
)
if (chapterEntry == null) {
chaptersToInsert.add(chapterData)
} else {
chaptersToUpdate.add(chapterData)
}
}
transaction {
if (chaptersToInsert.isNotEmpty()) {
ChapterTable.batchInsert(chaptersToInsert) {
this[ChapterTable.url] = it.url
this[ChapterTable.name] = it.name
this[ChapterTable.date_upload] = it.uploadDate
this[ChapterTable.chapter_number] = it.chapterNumber
this[ChapterTable.scanlator] = it.scanlator
this[ChapterTable.sourceOrder] = it.index
this[ChapterTable.fetchedAt] = it.fetchedAt
this[ChapterTable.manga] = it.mangaId
this[ChapterTable.realUrl] = it.realUrl
}
}
if (chaptersToUpdate.isNotEmpty()) {
BatchUpdateStatement(ChapterTable).apply {
chaptersToUpdate.forEach {
addBatch(EntityID(it.id, ChapterTable))
this[ChapterTable.name] = it.name
this[ChapterTable.date_upload] = it.uploadDate
this[ChapterTable.chapter_number] = it.chapterNumber
this[ChapterTable.scanlator] = it.scanlator
this[ChapterTable.sourceOrder] = it.index
this[ChapterTable.realUrl] = it.realUrl
} }
execute(this@transaction)
}
}
MangaTable.update({ MangaTable.id eq mangaId }) { val numberOfCurrentChapters = getCountOfMangaChapters(mangaId)
it[MangaTable.chaptersLastFetchedAt] = Instant.now().epochSecond val chapterList = source.getChapterList(sManga)
}
}
val newChapters = // Recognize number for new chapters.
transaction { chapterList.forEach { chapter ->
ChapterTable.select { ChapterTable.manga eq mangaId } (source as? HttpSource)?.prepareNewChapter(chapter, sManga)
.orderBy(ChapterTable.sourceOrder to SortOrder.DESC).toList() val chapterNumber = ChapterRecognition.parseChapterNumber(manga.title, chapter.name, chapter.chapter_number.toDouble())
} chapter.chapter_number = chapterNumber.toFloat()
chapter.name = chapter.name.sanitize(manga.title)
// clear any orphaned/duplicate chapters that are in the db but not in `chapterList` chapter.scanlator = chapter.scanlator?.ifBlank { null }
val dbChapterCount = newChapters.count()
if (dbChapterCount > chapterList.size) { // we got some clean up due
val dbChapterList =
transaction {
ChapterTable.select { ChapterTable.manga eq mangaId }
.orderBy(ChapterTable.url to ASC).toList()
} }
val chapterUrls = chapterList.map { it.url }.toSet() val now = Instant.now().epochSecond
// Used to not set upload date of older chapters
// to a higher value than newer chapters
var maxSeenUploadDate = 0L
val chaptersIdsToDelete = val chaptersInDb =
dbChapterList.mapIndexedNotNull { index, dbChapter -> transaction {
val isOrphaned = !chapterUrls.contains(dbChapter[ChapterTable.url]) ChapterTable.select { ChapterTable.manga eq mangaId }
val isDuplicate = .map { ChapterTable.toDataClass(it) }
index < dbChapterList.lastIndex && dbChapter[ChapterTable.url] == dbChapterList[index + 1][ChapterTable.url] .toList()
val deleteChapter = isOrphaned || isDuplicate }
if (deleteChapter) {
dbChapter[ChapterTable.id].value val chaptersToInsert = mutableListOf<ChapterDataClass>()
val chaptersToUpdate = mutableListOf<ChapterDataClass>()
chapterList.reversed().forEachIndexed { index, fetchedChapter ->
val chapterEntry = chaptersInDb.find { it.url == fetchedChapter.url }
val chapterData =
ChapterDataClass.fromSChapter(
fetchedChapter,
chapterEntry?.id ?: 0,
index + 1,
now,
mangaId,
runCatching {
(source as? HttpSource)?.getChapterUrl(fetchedChapter)
}.getOrNull(),
)
if (chapterEntry == null) {
val newChapterData =
if (chapterData.uploadDate == 0L) {
val altDateUpload = if (maxSeenUploadDate == 0L) now else maxSeenUploadDate
chapterData.copy(uploadDate = altDateUpload)
} else {
maxSeenUploadDate = max(maxSeenUploadDate, chapterData.uploadDate)
chapterData
}
chaptersToInsert.add(newChapterData)
} else { } else {
null val newChapterData =
if (chapterData.uploadDate == 0L) {
chapterData.copy(uploadDate = chapterEntry.uploadDate)
} else {
chapterData
}
chaptersToUpdate.add(newChapterData)
} }
} }
transaction { val deletedChapterNumbers = TreeSet<Float>()
PageTable.deleteWhere { PageTable.chapter inList chaptersIdsToDelete } val deletedReadChapterNumbers = TreeSet<Float>()
ChapterTable.deleteWhere { ChapterTable.id inList chaptersIdsToDelete } val deletedBookmarkedChapterNumbers = TreeSet<Float>()
} val deletedDownloadedChapterNumbers = TreeSet<Float>()
} val deletedChapterNumberDateFetchMap = mutableMapOf<Float, kotlin.Long>()
if (manga.inLibrary) { // clear any orphaned/duplicate chapters that are in the db but not in `chapterList`
downloadNewChapters(mangaId, numberOfCurrentChapters, newChapters) val dbChapterCount = chaptersInDb.count()
} if (dbChapterCount > chapterList.size) { // we got some clean up due
val chapterUrls = chapterList.map { it.url }.toSet()
val chaptersIdsToDelete =
chaptersInDb.mapNotNull { dbChapter ->
if (!chapterUrls.contains(dbChapter.url)) {
if (dbChapter.read) deletedReadChapterNumbers.add(dbChapter.chapterNumber)
if (dbChapter.bookmarked) deletedBookmarkedChapterNumbers.add(dbChapter.chapterNumber)
if (dbChapter.downloaded) deletedDownloadedChapterNumbers.add(dbChapter.chapterNumber)
deletedChapterNumbers.add(dbChapter.chapterNumber)
deletedChapterNumberDateFetchMap[dbChapter.chapterNumber] = dbChapter.fetchedAt
dbChapter.id
} else {
null
}
}
transaction {
PageTable.deleteWhere { PageTable.chapter inList chaptersIdsToDelete }
ChapterTable.deleteWhere { ChapterTable.id inList chaptersIdsToDelete }
}
}
transaction {
if (chaptersToInsert.isNotEmpty()) {
ChapterTable.batchInsert(chaptersToInsert) { chapter ->
this[ChapterTable.url] = chapter.url
this[ChapterTable.name] = chapter.name
this[ChapterTable.date_upload] = chapter.uploadDate
this[ChapterTable.chapter_number] = chapter.chapterNumber
this[ChapterTable.scanlator] = chapter.scanlator
this[ChapterTable.sourceOrder] = chapter.index
this[ChapterTable.fetchedAt] = chapter.fetchedAt
this[ChapterTable.manga] = chapter.mangaId
this[ChapterTable.realUrl] = chapter.realUrl
this[ChapterTable.isRead] = false
this[ChapterTable.isBookmarked] = false
this[ChapterTable.isDownloaded] = false
// is recognized chapter number
if (chapter.chapterNumber >= 0f && chapter.chapterNumber in deletedChapterNumbers) {
this[ChapterTable.isRead] = chapter.chapterNumber in deletedReadChapterNumbers
this[ChapterTable.isBookmarked] = chapter.chapterNumber in deletedBookmarkedChapterNumbers
this[ChapterTable.isDownloaded] = chapter.chapterNumber in deletedDownloadedChapterNumbers
// Try to use the fetch date of the original entry to not pollute 'Updates' tab
deletedChapterNumberDateFetchMap[chapter.chapterNumber]?.let {
this[ChapterTable.fetchedAt] = it
}
}
}
}
if (chaptersToUpdate.isNotEmpty()) {
BatchUpdateStatement(ChapterTable).apply {
chaptersToUpdate.forEach {
addBatch(EntityID(it.id, ChapterTable))
this[ChapterTable.name] = it.name
this[ChapterTable.date_upload] = it.uploadDate
this[ChapterTable.chapter_number] = it.chapterNumber
this[ChapterTable.scanlator] = it.scanlator
this[ChapterTable.sourceOrder] = it.index
this[ChapterTable.realUrl] = it.realUrl
}
execute(this@transaction)
}
}
MangaTable.update({ MangaTable.id eq mangaId }) {
it[MangaTable.chaptersLastFetchedAt] = Instant.now().epochSecond
}
}
val newChapters =
transaction {
ChapterTable.select { ChapterTable.manga eq mangaId }
.orderBy(ChapterTable.sourceOrder to SortOrder.DESC).toList()
}
if (manga.inLibrary) {
downloadNewChapters(mangaId, numberOfCurrentChapters, newChapters)
}
chapterList
}
return chapterList return chapterList
} }

View File

@@ -22,6 +22,7 @@ import okio.buffer
import okio.gzip import okio.gzip
import okio.source import okio.source
import org.jetbrains.exposed.sql.and import org.jetbrains.exposed.sql.and
import org.jetbrains.exposed.sql.batchInsert
import org.jetbrains.exposed.sql.insert import org.jetbrains.exposed.sql.insert
import org.jetbrains.exposed.sql.insertAndGetId import org.jetbrains.exposed.sql.insertAndGetId
import org.jetbrains.exposed.sql.select import org.jetbrains.exposed.sql.select
@@ -294,23 +295,25 @@ object ProtoBackupImport : ProtoBackupBase() {
// insert chapter data // insert chapter data
val chaptersLength = chapters.size val chaptersLength = chapters.size
chapters.forEach { chapter -> ChapterTable.batchInsert(chapters) { chapter ->
ChapterTable.insert { this[ChapterTable.url] = chapter.url
it[url] = chapter.url this[ChapterTable.name] = chapter.name
it[name] = chapter.name if (chapter.date_upload == 0L) {
it[date_upload] = chapter.date_upload this[ChapterTable.date_upload] = chapter.date_fetch
it[chapter_number] = chapter.chapter_number } else {
it[scanlator] = chapter.scanlator this[ChapterTable.date_upload] = chapter.date_upload
it[sourceOrder] = chaptersLength - chapter.source_order
it[ChapterTable.manga] = mangaId
it[isRead] = chapter.read
it[lastPageRead] = chapter.last_page_read
it[isBookmarked] = chapter.bookmark
it[fetchedAt] = TimeUnit.MILLISECONDS.toSeconds(chapter.date_fetch)
} }
this[ChapterTable.chapter_number] = chapter.chapter_number
this[ChapterTable.scanlator] = chapter.scanlator
this[ChapterTable.sourceOrder] = chaptersLength - chapter.source_order
this[ChapterTable.manga] = mangaId
this[ChapterTable.isRead] = chapter.read
this[ChapterTable.lastPageRead] = chapter.last_page_read
this[ChapterTable.isBookmarked] = chapter.bookmark
this[ChapterTable.fetchedAt] = TimeUnit.MILLISECONDS.toSeconds(chapter.date_fetch)
} }
// insert categories // insert categories
@@ -350,7 +353,11 @@ object ProtoBackupImport : ProtoBackupBase() {
ChapterTable.insert { ChapterTable.insert {
it[url] = chapter.url it[url] = chapter.url
it[name] = chapter.name it[name] = chapter.name
it[date_upload] = chapter.date_upload if (chapter.date_upload == 0L) {
it[date_upload] = chapter.date_fetch
} else {
it[date_upload] = chapter.date_upload
}
it[chapter_number] = chapter.chapter_number it[chapter_number] = chapter.chapter_number
it[scanlator] = chapter.scanlator it[scanlator] = chapter.scanlator

View File

@@ -15,7 +15,7 @@ import java.io.InputStream
/* /*
* Base class for downloaded chapter files provider, example: Folder, Archive * Base class for downloaded chapter files provider, example: Folder, Archive
* */ */
abstract class ChaptersFilesProvider(val mangaId: Int, val chapterId: Int) : DownloadedFilesProvider { abstract class ChaptersFilesProvider(val mangaId: Int, val chapterId: Int) : DownloadedFilesProvider {
abstract fun getImageImpl(index: Int): Pair<InputStream, String> abstract fun getImageImpl(index: Int): Pair<InputStream, String>

View File

@@ -56,7 +56,7 @@ data class ChapterDataClass(
name = sChapter.name, name = sChapter.name,
uploadDate = sChapter.date_upload, uploadDate = sChapter.date_upload,
chapterNumber = sChapter.chapter_number, chapterNumber = sChapter.chapter_number,
scanlator = sChapter.scanlator ?: "", scanlator = sChapter.scanlator,
index = index, index = index,
fetchedAt = fetchedAt, fetchedAt = fetchedAt,
realUrl = realUrl, realUrl = realUrl,

View File

@@ -0,0 +1,21 @@
package suwayomi.tachidesk.server.database.migration
/*
* Copyright (C) Contributors to the Suwayomi project
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import de.neonew.exposed.migrations.helpers.SQLMigration
@Suppress("ClassName", "unused")
class M0030_FixDateUpload : SQLMigration() {
// language=h2
override val sql =
"""
UPDATE CHAPTER
SET DATE_UPLOAD = (FETCHED_AT * 1000)
WHERE DATE_UPLOAD = 0;
""".trimIndent()
}