WebView based cloudflare interceptor (#456)

* WebView based cloudflare interceptor

ported https://github.com/vvanglro/cf-clearance to kotlin

* code clean up

* Forgot to commit these

* Get ResolveWithWebView working
1. Make sure to .use all closeable resources
2. Use 10 seconds instead of 1 second for waiting for cloudflare(this was the most probable issue)
3. Use Extension UA when possible
4. Minor cleanup of logging

* rewrite and refactor

Co-authored-by: Syer10 <syer10@users.noreply.github.com>
This commit is contained in:
Aria Moradi
2022-12-04 12:08:54 +03:30
committed by GitHub
parent 5f8e74f017
commit f47dc6b9de
13 changed files with 695 additions and 50 deletions

View File

@@ -39,6 +39,7 @@ class NetworkHelper(context: Context) {
.cookieJar(cookieManager)
.connectTimeout(30, TimeUnit.SECONDS)
.readTimeout(30, TimeUnit.SECONDS)
.callTimeout(2, TimeUnit.MINUTES)
.addInterceptor(UserAgentInterceptor())
if (serverConfig.debugLogsEnabled) {

View File

@@ -62,7 +62,7 @@ suspend fun Call.await(): Response {
object : Callback {
override fun onResponse(call: Call, response: Response) {
if (!response.isSuccessful) {
continuation.resumeWithException(Exception("HTTP error ${response.code}"))
continuation.resumeWithException(HttpException(response.code))
return
}
@@ -94,7 +94,7 @@ fun Call.asObservableSuccess(): Observable<Response> {
.doOnNext { response ->
if (!response.isSuccessful) {
response.close()
throw Exception("HTTP error ${response.code}")
throw HttpException(response.code)
}
}
}
@@ -136,3 +136,5 @@ inline fun <reified T> Response.parseAs(): T {
return json.decodeFromString(responseBody)
}
}
class HttpException(val code: Int) : IllegalStateException("HTTP error $code")

View File

@@ -1,19 +1,24 @@
package eu.kanade.tachiyomi.network.interceptor
import com.gargoylesoftware.htmlunit.BrowserVersion
import com.gargoylesoftware.htmlunit.WebClient
import com.gargoylesoftware.htmlunit.html.HtmlPage
import com.microsoft.playwright.Browser
import com.microsoft.playwright.BrowserType.LaunchOptions
import com.microsoft.playwright.Page
import com.microsoft.playwright.Playwright
import eu.kanade.tachiyomi.network.NetworkHelper
import eu.kanade.tachiyomi.network.interceptor.CFClearance.resolveWithWebView
import mu.KotlinLogging
import okhttp3.Cookie
import okhttp3.HttpUrl
import okhttp3.Interceptor
import okhttp3.Request
import okhttp3.Response
import suwayomi.tachidesk.server.ServerConfig
import suwayomi.tachidesk.server.serverConfig
import uy.kohesive.injekt.injectLazy
import java.io.IOException
import kotlin.time.Duration.Companion.seconds
import kotlin.time.DurationUnit
// from TachiWeb-Server
class CloudflareInterceptor : Interceptor {
private val logger = KotlinLogging.logger {}
@@ -25,20 +30,22 @@ class CloudflareInterceptor : Interceptor {
logger.trace { "CloudflareInterceptor is being used." }
val response = chain.proceed(originalRequest)
val originalResponse = chain.proceed(chain.request())
// Check if Cloudflare anti-bot is on
if (response.code != 503 || response.header("Server") !in SERVER_CHECK) {
return response
if (!(originalResponse.code in ERROR_CODES && originalResponse.header("Server") in SERVER_CHECK)) {
return originalResponse
}
logger.debug { "Cloudflare anti-bot is on, CloudflareInterceptor is kicking in..." }
return try {
response.close()
originalResponse.close()
network.cookies.remove(originalRequest.url.toUri())
chain.proceed(resolveChallenge(response))
val request = resolveWithWebView(originalRequest)
chain.proceed(request)
} catch (e: Exception) {
// Because OkHttp's enqueue only handles IOExceptions, wrap the exception so that
// we don't crash the entire app
@@ -46,65 +53,148 @@ class CloudflareInterceptor : Interceptor {
}
}
private fun resolveChallenge(response: Response): Request {
val browserVersion = BrowserVersion.BrowserVersionBuilder(BrowserVersion.BEST_SUPPORTED)
.setUserAgent(response.request.header("User-Agent") ?: BrowserVersion.BEST_SUPPORTED.userAgent)
.build()
val convertedCookies = WebClient(browserVersion).use { webClient ->
webClient.options.isThrowExceptionOnFailingStatusCode = false
webClient.options.isThrowExceptionOnScriptError = false
webClient.getPage<HtmlPage>(response.request.url.toString())
webClient.waitForBackgroundJavaScript(10000)
// Challenge solved, process cookies
webClient.cookieManager.cookies.filter {
// Only include Cloudflare cookies
it.name.startsWith("__cf") || it.name.startsWith("cf_")
}.map {
// Convert cookies -> OkHttp format
Cookie.Builder()
.domain(it.domain.removePrefix("."))
.expiresAt(it.expires?.time ?: Long.MAX_VALUE)
.name(it.name)
.path(it.path)
.value(it.value).apply {
if (it.isHttpOnly) httpOnly()
if (it.isSecure) secure()
}.build()
companion object {
private val ERROR_CODES = listOf(403, 503)
private val SERVER_CHECK = arrayOf("cloudflare-nginx", "cloudflare")
private val COOKIE_NAMES = listOf("cf_clearance")
}
}
/*
* This class is ported from https://github.com/vvanglro/cf-clearance
* The original code is licensed under Apache 2.0
*/
object CFClearance {
private val logger = KotlinLogging.logger {}
private val network: NetworkHelper by injectLazy()
fun resolveWithWebView(originalRequest: Request): Request {
val url = originalRequest.url.toString()
logger.debug { "resolveWithWebView($url)" }
val cookies = Playwright.create().use { playwright ->
playwright.chromium().launch(
LaunchOptions()
.setHeadless(false)
.apply {
if (serverConfig.socksProxyEnabled) {
setProxy("socks5://${serverConfig.socksProxyHost}:${serverConfig.socksProxyPort}")
}
}
).use { browser ->
val userAgent = originalRequest.header("User-Agent")
if (userAgent != null) {
browser.newContext(Browser.NewContextOptions().setUserAgent(userAgent)).use { browserContext ->
browserContext.newPage().use { getCookies(it, url) }
}
} else {
browser.newPage().use { getCookies(it, url) }
}
}
}
// Copy cookies to cookie store
convertedCookies.forEach {
cookies.groupBy { it.domain }.forEach { (domain, cookies) ->
network.cookies.addAll(
HttpUrl.Builder()
url = HttpUrl.Builder()
.scheme("http")
.host(it.domain)
.host(domain)
.build(),
listOf(it)
cookies = cookies
)
}
// Merge new and existing cookies for this request
// Find the cookies that we need to merge into this request
val convertedForThisRequest = convertedCookies.filter {
it.matches(response.request.url)
val convertedForThisRequest = cookies.filter {
it.matches(originalRequest.url)
}
// Extract cookies from current request
val existingCookies = Cookie.parseAll(
response.request.url,
response.request.headers
originalRequest.url,
originalRequest.headers
)
// Filter out existing values of cookies that we are about to merge in
val filteredExisting = existingCookies.filter { existing ->
convertedForThisRequest.none { converted -> converted.name == existing.name }
}
logger.trace { "Existing cookies" }
logger.trace { existingCookies.joinToString("; ") }
val newCookies = filteredExisting + convertedForThisRequest
return response.request.newBuilder()
.header("Cookie", newCookies.map { it.toString() }.joinToString("; "))
logger.trace { "New cookies" }
logger.trace { newCookies.joinToString("; ") }
return originalRequest.newBuilder()
.header("Cookie", newCookies.joinToString("; ") { "${it.name}=${it.value}" })
.build()
}
companion object {
private val SERVER_CHECK = arrayOf("cloudflare-nginx", "cloudflare")
private val COOKIE_NAMES = listOf("cf_clearance")
private fun getCookies(page: Page, url: String): List<Cookie> {
applyStealthInitScripts(page)
page.navigate(url)
val challengeResolved = waitForChallengeResolve(page)
return if (challengeResolved) {
val cookies = page.context().cookies()
logger.debug {
val userAgent = page.evaluate("() => {return navigator.userAgent}")
"Playwright User-Agent is $userAgent"
}
// Convert PlayWright cookies to OkHttp cookies
cookies.map {
Cookie.Builder()
.domain(it.domain.removePrefix("."))
.expiresAt(it.expires?.times(1000)?.toLong() ?: Long.MAX_VALUE)
.name(it.name)
.path(it.path)
.value(it.value).apply {
if (it.httpOnly) httpOnly()
if (it.secure) secure()
}.build()
}
} else {
logger.debug { "Cloudflare challenge failed to resolve" }
throw CloudflareBypassException()
}
}
// ref: https://github.com/vvanglro/cf-clearance/blob/44124a8f06d8d0ecf2bf558a027082ff88dab435/cf_clearance/stealth.py#L18
private val stealthInitScripts by lazy {
arrayOf(
ServerConfig::class.java.getResource("/cloudflare-js/canvas.fingerprinting.js")!!.readText(),
ServerConfig::class.java.getResource("/cloudflare-js/chrome.global.js")!!.readText(),
ServerConfig::class.java.getResource("/cloudflare-js/emulate.touch.js")!!.readText(),
ServerConfig::class.java.getResource("/cloudflare-js/navigator.permissions.js")!!.readText(),
ServerConfig::class.java.getResource("/cloudflare-js/navigator.webdriver.js")!!.readText(),
ServerConfig::class.java.getResource("/cloudflare-js/chrome.runtime.js")!!.readText(),
ServerConfig::class.java.getResource("/cloudflare-js/chrome.plugin.js")!!.readText()
)
}
// ref: https://github.com/vvanglro/cf-clearance/blob/44124a8f06d8d0ecf2bf558a027082ff88dab435/cf_clearance/stealth.py#L76
private fun applyStealthInitScripts(page: Page) {
for (script in stealthInitScripts) {
page.addInitScript(script)
}
}
// ref: https://github.com/vvanglro/cf-clearance/blob/44124a8f06d8d0ecf2bf558a027082ff88dab435/cf_clearance/retry.py#L21
private fun waitForChallengeResolve(page: Page): Boolean {
// sometimes the user has to solve the captcha challenge manually, potentially wait a long time
val timeoutSeconds = 120
repeat(timeoutSeconds) {
page.waitForTimeout(1.seconds.toDouble(DurationUnit.MILLISECONDS))
val success = try {
page.querySelector("#challenge-form") == null
} catch (e: Exception) {
logger.debug(e) { "query Error" }
false
}
if (success) return true
}
return false
}
private class CloudflareBypassException : Exception()
}