Chapter Fetch Improvements (#754)

* Chapter fetch improvements

* Update previous date uploads

* Lint

* Fix backup inserts

* Remove extra maxSeenUploadDate

* Port downloaded over

* Make sure to set isDownloaded on all inserts
This commit is contained in:
Mitchell Syer
2023-11-04 18:09:40 -04:00
committed by GitHub
parent 21e325af9c
commit 0785f4d0f5
6 changed files with 261 additions and 134 deletions

View File

@@ -0,0 +1,45 @@
package eu.kanade.tachiyomi.util.chapter
object ChapterSanitizer {
fun String.sanitize(title: String): String {
return trim()
.removePrefix(title)
.trim(*CHAPTER_TRIM_CHARS)
}
private val CHAPTER_TRIM_CHARS =
arrayOf(
// Whitespace
' ',
'\u0009',
'\u000A',
'\u000B',
'\u000C',
'\u000D',
'\u0020',
'\u0085',
'\u00A0',
'\u1680',
'\u2000',
'\u2001',
'\u2002',
'\u2003',
'\u2004',
'\u2005',
'\u2006',
'\u2007',
'\u2008',
'\u2009',
'\u200A',
'\u2028',
'\u2029',
'\u202F',
'\u205F',
'\u3000',
// Separators
'-',
'_',
',',
':',
).toCharArray()
}

View File

@@ -7,17 +7,21 @@ package suwayomi.tachidesk.manga.impl
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import com.google.common.cache.Cache
import com.google.common.cache.CacheBuilder
import eu.kanade.tachiyomi.source.model.SChapter import eu.kanade.tachiyomi.source.model.SChapter
import eu.kanade.tachiyomi.source.model.SManga import eu.kanade.tachiyomi.source.model.SManga
import eu.kanade.tachiyomi.source.online.HttpSource import eu.kanade.tachiyomi.source.online.HttpSource
import eu.kanade.tachiyomi.util.chapter.ChapterRecognition import eu.kanade.tachiyomi.util.chapter.ChapterRecognition
import eu.kanade.tachiyomi.util.chapter.ChapterSanitizer.sanitize
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import kotlinx.serialization.Serializable import kotlinx.serialization.Serializable
import mu.KotlinLogging import mu.KotlinLogging
import org.jetbrains.exposed.dao.id.EntityID import org.jetbrains.exposed.dao.id.EntityID
import org.jetbrains.exposed.sql.Op import org.jetbrains.exposed.sql.Op
import org.jetbrains.exposed.sql.ResultRow import org.jetbrains.exposed.sql.ResultRow
import org.jetbrains.exposed.sql.SortOrder import org.jetbrains.exposed.sql.SortOrder
import org.jetbrains.exposed.sql.SortOrder.ASC
import org.jetbrains.exposed.sql.SqlExpressionBuilder.inList import org.jetbrains.exposed.sql.SqlExpressionBuilder.inList
import org.jetbrains.exposed.sql.and import org.jetbrains.exposed.sql.and
import org.jetbrains.exposed.sql.batchInsert import org.jetbrains.exposed.sql.batchInsert
@@ -41,7 +45,11 @@ import suwayomi.tachidesk.manga.model.table.MangaTable
import suwayomi.tachidesk.manga.model.table.PageTable import suwayomi.tachidesk.manga.model.table.PageTable
import suwayomi.tachidesk.manga.model.table.toDataClass import suwayomi.tachidesk.manga.model.table.toDataClass
import suwayomi.tachidesk.server.serverConfig import suwayomi.tachidesk.server.serverConfig
import java.lang.Long
import java.time.Instant import java.time.Instant
import java.util.TreeSet
import java.util.concurrent.TimeUnit
import kotlin.math.max
object Chapter { object Chapter {
private val logger = KotlinLogging.logger { } private val logger = KotlinLogging.logger { }
@@ -109,7 +117,15 @@ object Chapter {
} }
} }
val map: Cache<Int, Mutex> =
CacheBuilder.newBuilder()
.expireAfterAccess(10, TimeUnit.MINUTES)
.build()
suspend fun fetchChapterList(mangaId: Int): List<SChapter> { suspend fun fetchChapterList(mangaId: Int): List<SChapter> {
val mutex = map.get(mangaId) { Mutex() }
val chapterList =
mutex.withLock {
val manga = getManga(mangaId) val manga = getManga(mangaId)
val source = getCatalogueSourceOrStub(manga.sourceId.toLong()) val source = getCatalogueSourceOrStub(manga.sourceId.toLong())
@@ -127,14 +143,20 @@ object Chapter {
(source as? HttpSource)?.prepareNewChapter(chapter, sManga) (source as? HttpSource)?.prepareNewChapter(chapter, sManga)
val chapterNumber = ChapterRecognition.parseChapterNumber(manga.title, chapter.name, chapter.chapter_number.toDouble()) val chapterNumber = ChapterRecognition.parseChapterNumber(manga.title, chapter.name, chapter.chapter_number.toDouble())
chapter.chapter_number = chapterNumber.toFloat() chapter.chapter_number = chapterNumber.toFloat()
chapter.name = chapter.name.sanitize(manga.title)
chapter.scanlator = chapter.scanlator?.ifBlank { null }
} }
val now = Instant.now().epochSecond val now = Instant.now().epochSecond
// Used to not set upload date of older chapters
// to a higher value than newer chapters
var maxSeenUploadDate = 0L
val chaptersInDb = val chaptersInDb =
transaction { transaction {
ChapterTable.select { ChapterTable.manga eq mangaId } ChapterTable.select { ChapterTable.manga eq mangaId }
.map { ChapterTable.toDataClass(it) } .map { ChapterTable.toDataClass(it) }
.toSet() .toList()
} }
val chaptersToInsert = mutableListOf<ChapterDataClass>() val chaptersToInsert = mutableListOf<ChapterDataClass>()
@@ -156,24 +178,83 @@ object Chapter {
) )
if (chapterEntry == null) { if (chapterEntry == null) {
chaptersToInsert.add(chapterData) val newChapterData =
if (chapterData.uploadDate == 0L) {
val altDateUpload = if (maxSeenUploadDate == 0L) now else maxSeenUploadDate
chapterData.copy(uploadDate = altDateUpload)
} else { } else {
chaptersToUpdate.add(chapterData) maxSeenUploadDate = max(maxSeenUploadDate, chapterData.uploadDate)
chapterData
}
chaptersToInsert.add(newChapterData)
} else {
val newChapterData =
if (chapterData.uploadDate == 0L) {
chapterData.copy(uploadDate = chapterEntry.uploadDate)
} else {
chapterData
}
chaptersToUpdate.add(newChapterData)
}
}
val deletedChapterNumbers = TreeSet<Float>()
val deletedReadChapterNumbers = TreeSet<Float>()
val deletedBookmarkedChapterNumbers = TreeSet<Float>()
val deletedDownloadedChapterNumbers = TreeSet<Float>()
val deletedChapterNumberDateFetchMap = mutableMapOf<Float, kotlin.Long>()
// clear any orphaned/duplicate chapters that are in the db but not in `chapterList`
val dbChapterCount = chaptersInDb.count()
if (dbChapterCount > chapterList.size) { // we got some clean up due
val chapterUrls = chapterList.map { it.url }.toSet()
val chaptersIdsToDelete =
chaptersInDb.mapNotNull { dbChapter ->
if (!chapterUrls.contains(dbChapter.url)) {
if (dbChapter.read) deletedReadChapterNumbers.add(dbChapter.chapterNumber)
if (dbChapter.bookmarked) deletedBookmarkedChapterNumbers.add(dbChapter.chapterNumber)
if (dbChapter.downloaded) deletedDownloadedChapterNumbers.add(dbChapter.chapterNumber)
deletedChapterNumbers.add(dbChapter.chapterNumber)
deletedChapterNumberDateFetchMap[dbChapter.chapterNumber] = dbChapter.fetchedAt
dbChapter.id
} else {
null
}
}
transaction {
PageTable.deleteWhere { PageTable.chapter inList chaptersIdsToDelete }
ChapterTable.deleteWhere { ChapterTable.id inList chaptersIdsToDelete }
} }
} }
transaction { transaction {
if (chaptersToInsert.isNotEmpty()) { if (chaptersToInsert.isNotEmpty()) {
ChapterTable.batchInsert(chaptersToInsert) { ChapterTable.batchInsert(chaptersToInsert) { chapter ->
this[ChapterTable.url] = it.url this[ChapterTable.url] = chapter.url
this[ChapterTable.name] = it.name this[ChapterTable.name] = chapter.name
this[ChapterTable.date_upload] = it.uploadDate this[ChapterTable.date_upload] = chapter.uploadDate
this[ChapterTable.chapter_number] = it.chapterNumber this[ChapterTable.chapter_number] = chapter.chapterNumber
this[ChapterTable.scanlator] = it.scanlator this[ChapterTable.scanlator] = chapter.scanlator
this[ChapterTable.sourceOrder] = it.index this[ChapterTable.sourceOrder] = chapter.index
this[ChapterTable.fetchedAt] = it.fetchedAt this[ChapterTable.fetchedAt] = chapter.fetchedAt
this[ChapterTable.manga] = it.mangaId this[ChapterTable.manga] = chapter.mangaId
this[ChapterTable.realUrl] = it.realUrl this[ChapterTable.realUrl] = chapter.realUrl
this[ChapterTable.isRead] = false
this[ChapterTable.isBookmarked] = false
this[ChapterTable.isDownloaded] = false
// is recognized chapter number
if (chapter.chapterNumber >= 0f && chapter.chapterNumber in deletedChapterNumbers) {
this[ChapterTable.isRead] = chapter.chapterNumber in deletedReadChapterNumbers
this[ChapterTable.isBookmarked] = chapter.chapterNumber in deletedBookmarkedChapterNumbers
this[ChapterTable.isDownloaded] = chapter.chapterNumber in deletedDownloadedChapterNumbers
// Try to use the fetch date of the original entry to not pollute 'Updates' tab
deletedChapterNumberDateFetchMap[chapter.chapterNumber]?.let {
this[ChapterTable.fetchedAt] = it
}
}
} }
} }
@@ -203,40 +284,13 @@ object Chapter {
.orderBy(ChapterTable.sourceOrder to SortOrder.DESC).toList() .orderBy(ChapterTable.sourceOrder to SortOrder.DESC).toList()
} }
// clear any orphaned/duplicate chapters that are in the db but not in `chapterList`
val dbChapterCount = newChapters.count()
if (dbChapterCount > chapterList.size) { // we got some clean up due
val dbChapterList =
transaction {
ChapterTable.select { ChapterTable.manga eq mangaId }
.orderBy(ChapterTable.url to ASC).toList()
}
val chapterUrls = chapterList.map { it.url }.toSet()
val chaptersIdsToDelete =
dbChapterList.mapIndexedNotNull { index, dbChapter ->
val isOrphaned = !chapterUrls.contains(dbChapter[ChapterTable.url])
val isDuplicate =
index < dbChapterList.lastIndex && dbChapter[ChapterTable.url] == dbChapterList[index + 1][ChapterTable.url]
val deleteChapter = isOrphaned || isDuplicate
if (deleteChapter) {
dbChapter[ChapterTable.id].value
} else {
null
}
}
transaction {
PageTable.deleteWhere { PageTable.chapter inList chaptersIdsToDelete }
ChapterTable.deleteWhere { ChapterTable.id inList chaptersIdsToDelete }
}
}
if (manga.inLibrary) { if (manga.inLibrary) {
downloadNewChapters(mangaId, numberOfCurrentChapters, newChapters) downloadNewChapters(mangaId, numberOfCurrentChapters, newChapters)
} }
chapterList
}
return chapterList return chapterList
} }

View File

@@ -22,6 +22,7 @@ import okio.buffer
import okio.gzip import okio.gzip
import okio.source import okio.source
import org.jetbrains.exposed.sql.and import org.jetbrains.exposed.sql.and
import org.jetbrains.exposed.sql.batchInsert
import org.jetbrains.exposed.sql.insert import org.jetbrains.exposed.sql.insert
import org.jetbrains.exposed.sql.insertAndGetId import org.jetbrains.exposed.sql.insertAndGetId
import org.jetbrains.exposed.sql.select import org.jetbrains.exposed.sql.select
@@ -294,23 +295,25 @@ object ProtoBackupImport : ProtoBackupBase() {
// insert chapter data // insert chapter data
val chaptersLength = chapters.size val chaptersLength = chapters.size
chapters.forEach { chapter -> ChapterTable.batchInsert(chapters) { chapter ->
ChapterTable.insert { this[ChapterTable.url] = chapter.url
it[url] = chapter.url this[ChapterTable.name] = chapter.name
it[name] = chapter.name if (chapter.date_upload == 0L) {
it[date_upload] = chapter.date_upload this[ChapterTable.date_upload] = chapter.date_fetch
it[chapter_number] = chapter.chapter_number } else {
it[scanlator] = chapter.scanlator this[ChapterTable.date_upload] = chapter.date_upload
it[sourceOrder] = chaptersLength - chapter.source_order
it[ChapterTable.manga] = mangaId
it[isRead] = chapter.read
it[lastPageRead] = chapter.last_page_read
it[isBookmarked] = chapter.bookmark
it[fetchedAt] = TimeUnit.MILLISECONDS.toSeconds(chapter.date_fetch)
} }
this[ChapterTable.chapter_number] = chapter.chapter_number
this[ChapterTable.scanlator] = chapter.scanlator
this[ChapterTable.sourceOrder] = chaptersLength - chapter.source_order
this[ChapterTable.manga] = mangaId
this[ChapterTable.isRead] = chapter.read
this[ChapterTable.lastPageRead] = chapter.last_page_read
this[ChapterTable.isBookmarked] = chapter.bookmark
this[ChapterTable.fetchedAt] = TimeUnit.MILLISECONDS.toSeconds(chapter.date_fetch)
} }
// insert categories // insert categories
@@ -350,7 +353,11 @@ object ProtoBackupImport : ProtoBackupBase() {
ChapterTable.insert { ChapterTable.insert {
it[url] = chapter.url it[url] = chapter.url
it[name] = chapter.name it[name] = chapter.name
if (chapter.date_upload == 0L) {
it[date_upload] = chapter.date_fetch
} else {
it[date_upload] = chapter.date_upload it[date_upload] = chapter.date_upload
}
it[chapter_number] = chapter.chapter_number it[chapter_number] = chapter.chapter_number
it[scanlator] = chapter.scanlator it[scanlator] = chapter.scanlator

View File

@@ -15,7 +15,7 @@ import java.io.InputStream
/* /*
* Base class for downloaded chapter files provider, example: Folder, Archive * Base class for downloaded chapter files provider, example: Folder, Archive
* */ */
abstract class ChaptersFilesProvider(val mangaId: Int, val chapterId: Int) : DownloadedFilesProvider { abstract class ChaptersFilesProvider(val mangaId: Int, val chapterId: Int) : DownloadedFilesProvider {
abstract fun getImageImpl(index: Int): Pair<InputStream, String> abstract fun getImageImpl(index: Int): Pair<InputStream, String>

View File

@@ -56,7 +56,7 @@ data class ChapterDataClass(
name = sChapter.name, name = sChapter.name,
uploadDate = sChapter.date_upload, uploadDate = sChapter.date_upload,
chapterNumber = sChapter.chapter_number, chapterNumber = sChapter.chapter_number,
scanlator = sChapter.scanlator ?: "", scanlator = sChapter.scanlator,
index = index, index = index,
fetchedAt = fetchedAt, fetchedAt = fetchedAt,
realUrl = realUrl, realUrl = realUrl,

View File

@@ -0,0 +1,21 @@
package suwayomi.tachidesk.server.database.migration
/*
* Copyright (C) Contributors to the Suwayomi project
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import de.neonew.exposed.migrations.helpers.SQLMigration
@Suppress("ClassName", "unused")
class M0030_FixDateUpload : SQLMigration() {
// language=h2
override val sql =
"""
UPDATE CHAPTER
SET DATE_UPLOAD = (FETCHED_AT * 1000)
WHERE DATE_UPLOAD = 0;
""".trimIndent()
}