feat: add Dead Links management module and internal browser screen

This commit is contained in:
Bruno Charest 2026-04-23 16:04:44 -04:00
parent 2198324c2d
commit b0a6e8100b
10 changed files with 398 additions and 167 deletions

View File

@ -167,6 +167,9 @@ dependencies {
// JSoup for HTML parsing (metadata extraction)
implementation(libs.jsoup)
// Readability for better article extraction
implementation("net.dankito.readability4j:readability4j:1.0.8")
// Biometric
implementation(libs.androidx.biometric)

View File

@ -7,6 +7,14 @@ import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.safety.Safelist
import net.dankito.readability4j.Readability4J
import android.content.Context
import dagger.hilt.android.qualifiers.ApplicationContext
import kotlinx.coroutines.suspendCancellableCoroutine
import kotlinx.coroutines.launch
import kotlinx.coroutines.MainScope
import kotlinx.coroutines.delay
import kotlin.coroutines.resume
import javax.inject.Inject
import javax.inject.Singleton
@ -24,53 +32,19 @@ data class ReadableArticle(
)
/**
* Extracteur d'articles style Readability basé sur JSoup.
* Extracteur d'articles basé sur Readability4J (portage de Mozilla Readability).
* Extrait le contenu principal d'une page web en supprimant navigation, pubs, sidebars, etc.
*/
@Singleton
class ArticleExtractor @Inject constructor() {
class ArticleExtractor @Inject constructor(
@ApplicationContext private val context: Context
) {
companion object {
private const val TAG = "ArticleExtractor"
private const val TIMEOUT_MS = 15000
private const val WORDS_PER_MINUTE = 200
// Éléments à supprimer systématiquement
private val REMOVE_SELECTORS = listOf(
"script", "style", "noscript", "iframe", "object", "embed",
"nav", "header:not(article header)", "footer:not(article footer)",
".sidebar", "#sidebar", ".widget", ".ad", ".ads", ".advert",
".advertisement", "[class*=advert]", "[id*=advert]",
".social-share", ".share-buttons", ".sharing",
".comments", "#comments", ".comment-section",
".related-posts", ".related-articles", ".recommended",
".newsletter", ".subscribe", ".popup", ".modal",
".cookie-banner", ".cookie-notice", ".gdpr",
".breadcrumb", ".breadcrumbs", ".pagination",
".menu", ".navigation", "#navigation",
"[role=navigation]", "[role=banner]", "[role=complementary]",
".toc", "#toc", ".table-of-contents"
)
// Sélecteurs pour trouver le contenu principal (ordre de priorité)
private val CONTENT_SELECTORS = listOf(
"article",
"[role=main]",
"main",
".post-content",
".article-content",
".entry-content",
".content-body",
".article-body",
".post-body",
".story-body",
"#article-body",
"#content",
".content",
".post",
".article"
)
// Safelist HTML permise dans le contenu nettoyé
private val READER_SAFELIST = Safelist.relaxed()
.addTags("figure", "figcaption", "picture", "source", "video", "audio")
@ -91,14 +65,98 @@ class ArticleExtractor @Inject constructor() {
val doc = Jsoup.connect(url)
.timeout(TIMEOUT_MS)
.userAgent("Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
.header("Accept-Language", "en-US,en;q=0.5")
.followRedirects(true)
.maxBodySize(5 * 1024 * 1024) // 5 MB max
.get()
extractFromDocument(doc, url)
val article = extractFromDocument(doc, url)
if (article == null || article.wordCount < 50) {
// Si l'extraction JSoup échoue ou renvoie très peu de texte (ex: Cloudflare protection),
// on essaie avec WebView
Log.d(TAG, "JSoup n'a pas pu extraire assez de contenu, essai avec WebView...")
return@withContext extractWithWebView(url)
}
article
} catch (e: Exception) {
Log.e(TAG, "Erreur extraction article pour $url", e)
null
extractWithWebView(url)
}
}
/**
* Fallback: Utilise une WebView cachée pour exécuter le JavaScript (contourne Cloudflare/SPAs)
*/
private suspend fun extractWithWebView(url: String): ReadableArticle? = withContext(Dispatchers.Main) {
suspendCancellableCoroutine { continuation ->
var isResumed = false
try {
val webView = android.webkit.WebView(context).apply {
settings.javaScriptEnabled = true
settings.domStorageEnabled = true
settings.loadsImagesAutomatically = false // Pas besoin de charger les images pour le DOM
settings.userAgentString = "Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36"
}
// Timeout global de 15 secondes
val timeoutJob = MainScope().launch {
delay(15000)
if (!isResumed && continuation.isActive) {
isResumed = true
webView.stopLoading()
webView.destroy()
continuation.resume(null)
}
}
webView.webViewClient = object : android.webkit.WebViewClient() {
override fun onPageFinished(view: android.webkit.WebView?, pageUrl: String?) {
// Attendre un peu que le JS modifie le DOM (Cloudflare / React)
MainScope().launch {
delay(3000)
if (isResumed || !continuation.isActive) return@launch
view?.evaluateJavascript(
"(function() { return document.documentElement.outerHTML; })();"
) { html ->
if (isResumed || !continuation.isActive) return@evaluateJavascript
isResumed = true
timeoutJob.cancel()
val unescaped = try {
if (html != null && html != "null") {
org.json.JSONTokener(html).nextValue() as? String ?: html
} else {
""
}
} catch (e: Exception) {
html ?: ""
}
val doc = Jsoup.parse(unescaped, url)
val article = extractFromDocument(doc, url)
view?.destroy()
continuation.resume(article)
}
}
}
}
webView.loadUrl(url)
continuation.invokeOnCancellation {
timeoutJob.cancel()
if (!isResumed) {
webView.stopLoading()
webView.destroy()
}
}
} catch (e: Exception) {
if (continuation.isActive && !isResumed) {
continuation.resume(null)
}
}
}
}
@ -107,29 +165,27 @@ class ArticleExtractor @Inject constructor() {
*/
fun extractFromDocument(doc: Document, baseUrl: String): ReadableArticle? {
return try {
// Extraire les métadonnées
val title = extractTitle(doc)
val author = extractAuthor(doc)
// Extraire les métadonnées avec JSoup (plus robuste que Readability pour le site_name/image)
val siteName = extractSiteName(doc, baseUrl)
val leadImage = extractLeadImage(doc, baseUrl)
// Nettoyer le document
val cleanDoc = doc.clone()
removeUnwantedElements(cleanDoc)
// Utiliser Readability4J pour extraire l'article principal
val readability = Readability4J(baseUrl, doc.html())
val article = readability.parse()
// Trouver le contenu principal
val mainContent = findMainContent(cleanDoc)
?: return null
if (article.content == null || article.textContent == null) {
return null
}
// Nettoyer le HTML du contenu principal
val cleanHtml = Jsoup.clean(
mainContent.html(),
article.content!!,
baseUrl,
READER_SAFELIST
)
// Calculer les stats
val textContent = Jsoup.parse(cleanHtml).text()
val textContent = article.textContent!!
val wordCount = textContent.split(Regex("\\s+")).filter { it.isNotBlank() }.size
val readingTime = maxOf(1, wordCount / WORDS_PER_MINUTE)
@ -139,8 +195,8 @@ class ArticleExtractor @Inject constructor() {
}
ReadableArticle(
title = title ?: "Sans titre",
author = author,
title = article.title ?: extractTitle(doc) ?: "Sans titre",
author = article.byline ?: extractAuthor(doc),
siteName = siteName,
content = cleanHtml,
leadImage = leadImage,
@ -214,110 +270,6 @@ class ArticleExtractor @Inject constructor() {
return null
}
private fun removeUnwantedElements(doc: Document) {
for (selector in REMOVE_SELECTORS) {
try {
doc.select(selector).remove()
} catch (_: Exception) {
// Ignorer les erreurs de sélecteur
}
}
}
/**
* Trouve le contenu principal en utilisant des heuristiques.
* Essaie d'abord les sélecteurs connus, puis scoring par densité de texte.
*/
private fun findMainContent(doc: Document): Element? {
// 1. Essayer les sélecteurs connus
for (selector in CONTENT_SELECTORS) {
val candidates = doc.select(selector)
if (candidates.isNotEmpty()) {
// Prendre le candidat avec le plus de texte
val best = candidates.maxByOrNull { it.text().length }
if (best != null && best.text().length > 200) {
return best
}
}
}
// 2. Scoring par densité de texte sur les <div> et <section>
val candidates = doc.select("div, section")
if (candidates.isEmpty()) return doc.body()
var bestElement: Element? = null
var bestScore = 0.0
for (element in candidates) {
val score = scoreElement(element)
if (score > bestScore) {
bestScore = score
bestElement = element
}
}
return bestElement ?: doc.body()
}
/**
* Score un élément selon sa probabilité de contenir le contenu principal.
* Inspiré de l'algorithme Readability de Mozilla.
*/
private fun scoreElement(element: Element): Double {
var score = 0.0
// Texte direct (pas dans les enfants)
val text = element.ownText()
val textLength = text.length
// Plus de texte = plus probable
score += textLength * 0.1
// Nombre de paragraphes
val paragraphs = element.select("> p, > div > p")
score += paragraphs.size * 10.0
// Nombre de balises de contenu (images, code, etc.)
score += element.select("img").size * 3.0
score += element.select("pre, code").size * 5.0
score += element.select("blockquote").size * 3.0
score += element.select("h2, h3, h4").size * 5.0
// Pénalité pour les liens (haute densité = probablement navigation)
val links = element.select("a")
val linkTextLength = links.sumOf { it.text().length }
val totalTextLength = element.text().length
if (totalTextLength > 0) {
val linkDensity = linkTextLength.toDouble() / totalTextLength
if (linkDensity > 0.5) {
score *= 0.2 // Forte pénalité
} else if (linkDensity > 0.3) {
score *= 0.5
}
}
// Pénalité pour les éléments trop courts
if (totalTextLength < 100) {
score *= 0.1
}
// Bonus pour les classes/ids évocateurs
val classId = "${element.className()} ${element.id()}".lowercase()
if (classId.contains("article") || classId.contains("content") ||
classId.contains("post") || classId.contains("entry") ||
classId.contains("text") || classId.contains("body")) {
score *= 1.5
}
if (classId.contains("comment") || classId.contains("sidebar") ||
classId.contains("footer") || classId.contains("header") ||
classId.contains("nav") || classId.contains("menu") ||
classId.contains("ad") || classId.contains("widget")) {
score *= 0.2
}
return score
}
private fun resolveUrl(url: String, baseUrl: String): String {
return when {
url.startsWith("http") -> url

View File

@ -0,0 +1,136 @@
package com.shaarit.presentation.browser
import android.annotation.SuppressLint
import android.graphics.Bitmap
import android.webkit.WebChromeClient
import android.webkit.WebResourceRequest
import android.webkit.WebView
import android.webkit.WebViewClient
import androidx.compose.animation.AnimatedVisibility
import androidx.compose.foundation.layout.*
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.ArrowBack
import androidx.compose.material.icons.filled.Close
import androidx.compose.material.icons.filled.Refresh
import androidx.compose.material3.*
import androidx.compose.runtime.*
import androidx.compose.ui.Modifier
import androidx.compose.ui.text.style.TextOverflow
import androidx.compose.ui.viewinterop.AndroidView
import java.net.URLDecoder
@OptIn(ExperimentalMaterial3Api::class)
@SuppressLint("SetJavaScriptEnabled")
@Composable
fun InternalBrowserScreen(
encodedUrl: String,
onNavigateBack: () -> Unit
) {
val url = remember(encodedUrl) { URLDecoder.decode(encodedUrl, "UTF-8") }
var pageTitle by remember { mutableStateOf(url) }
var isLoading by remember { mutableStateOf(true) }
var progress by remember { mutableFloatStateOf(0f) }
var webView by remember { mutableStateOf<WebView?>(null) }
Scaffold(
topBar = {
TopAppBar(
title = {
Column {
Text(
text = pageTitle,
style = MaterialTheme.typography.titleMedium,
maxLines = 1,
overflow = TextOverflow.Ellipsis
)
Text(
text = url,
style = MaterialTheme.typography.bodySmall,
color = MaterialTheme.colorScheme.onSurfaceVariant,
maxLines = 1,
overflow = TextOverflow.Ellipsis
)
}
},
navigationIcon = {
IconButton(onClick = onNavigateBack) {
Icon(Icons.Default.Close, contentDescription = "Fermer")
}
},
actions = {
IconButton(
onClick = {
if (webView?.canGoBack() == true) {
webView?.goBack()
}
},
enabled = webView?.canGoBack() == true
) {
Icon(Icons.Default.ArrowBack, contentDescription = "Retour")
}
IconButton(onClick = { webView?.reload() }) {
Icon(Icons.Default.Refresh, contentDescription = "Actualiser")
}
}
)
}
) { paddingValues ->
Column(
modifier = Modifier
.fillMaxSize()
.padding(paddingValues)
) {
AnimatedVisibility(visible = isLoading) {
LinearProgressIndicator(
progress = progress,
modifier = Modifier.fillMaxWidth(),
color = MaterialTheme.colorScheme.primary
)
}
AndroidView(
modifier = Modifier.fillMaxSize(),
factory = { context ->
WebView(context).apply {
settings.javaScriptEnabled = true
settings.domStorageEnabled = true
settings.loadsImagesAutomatically = true
webViewClient = object : WebViewClient() {
override fun onPageStarted(view: WebView?, url: String?, favicon: Bitmap?) {
super.onPageStarted(view, url, favicon)
isLoading = true
}
override fun onPageFinished(view: WebView?, url: String?) {
super.onPageFinished(view, url)
isLoading = false
view?.title?.let { if (it.isNotBlank()) pageTitle = it }
}
}
webChromeClient = object : WebChromeClient() {
override fun onProgressChanged(view: WebView?, newProgress: Int) {
super.onProgressChanged(view, newProgress)
progress = newProgress / 100f
}
override fun onReceivedTitle(view: WebView?, title: String?) {
super.onReceivedTitle(view, title)
if (!title.isNullOrBlank()) {
pageTitle = title
}
}
}
loadUrl(url)
webView = this
}
},
update = {
webView = it
}
)
}
}
}

View File

@ -14,6 +14,9 @@ import androidx.compose.material.icons.filled.Delete
import androidx.compose.material.icons.filled.CheckCircle
import androidx.compose.material.icons.filled.Close
import androidx.compose.material.icons.filled.Refresh
import androidx.compose.material.icons.filled.Folder
import androidx.compose.material.icons.filled.Link
import androidx.compose.foundation.clickable
import androidx.compose.material3.*
import androidx.compose.runtime.*
import androidx.compose.ui.Alignment
@ -43,6 +46,8 @@ fun DeadLinksScreen(
val selectedLinkIds by viewModel.selectedLinkIds.collectAsState()
val isSelectionMode by viewModel.isSelectionMode.collectAsState()
val testResults by viewModel.linkTestResults.collectAsState()
val collections by viewModel.collections.collectAsState()
var showAddToCollectionDialog by remember { mutableStateOf(false) }
Box(
modifier = Modifier
@ -83,9 +88,25 @@ fun DeadLinksScreen(
actions = {
if (isSelectionMode) {
IconButton(
onClick = {
viewModel.excludeSelectedFromHealthCheck()
}
onClick = { showAddToCollectionDialog = true }
) {
Icon(
imageVector = Icons.Default.Folder,
contentDescription = "Ajouter à une collection",
tint = MaterialTheme.colorScheme.primary
)
}
IconButton(
onClick = { viewModel.verifySelectedLinks() }
) {
Icon(
imageVector = Icons.Default.Link,
contentDescription = "Validation de fonctionnement des liens",
tint = MaterialTheme.colorScheme.primary
)
}
IconButton(
onClick = { viewModel.excludeSelectedFromHealthCheck() }
) {
Icon(
imageVector = Icons.Default.CheckCircle,
@ -103,6 +124,43 @@ fun DeadLinksScreen(
},
containerColor = Color.Transparent
) { paddingValues ->
if (showAddToCollectionDialog) {
val regularCollections = remember(collections) { collections.filter { !it.isSmart } }
AlertDialog(
onDismissRequest = { showAddToCollectionDialog = false },
title = { Text("Ajouter à une collection") },
text = {
Column(modifier = Modifier.fillMaxWidth()) {
if (regularCollections.isEmpty()) {
Text("Aucune collection disponible.")
} else {
regularCollections.forEach { c ->
Row(
modifier = Modifier
.fillMaxWidth()
.clickable {
viewModel.addLinksToCollection(c.id, selectedLinkIds)
showAddToCollectionDialog = false
}
.padding(vertical = 12.dp),
verticalAlignment = Alignment.CenterVertically
) {
Text(c.icon)
Spacer(modifier = Modifier.width(12.dp))
Text(c.name)
}
}
}
}
},
confirmButton = {
TextButton(onClick = { showAddToCollectionDialog = false }) {
Text("Fermer")
}
}
)
}
Box(
modifier = Modifier
.padding(paddingValues)

View File

@ -7,24 +7,35 @@ import androidx.paging.cachedIn
import com.shaarit.data.local.dao.LinkDao
import com.shaarit.domain.model.ShaarliLink
import com.shaarit.domain.repository.LinkRepository
import com.shaarit.data.local.dao.CollectionDao
import com.shaarit.data.local.entity.CollectionLinkCrossRef
import com.shaarit.data.sync.SyncManager
import com.shaarit.core.storage.TokenManager
import dagger.hilt.android.lifecycle.HiltViewModel
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.StateFlow
import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.flow.stateIn
import kotlinx.coroutines.launch
import javax.inject.Inject
@HiltViewModel
class DeadLinksViewModel @Inject constructor(
private val linkRepository: LinkRepository,
private val linkDao: LinkDao
private val linkDao: LinkDao,
private val collectionDao: CollectionDao,
private val syncManager: SyncManager,
private val tokenManager: TokenManager
) : ViewModel() {
val pagedDeadLinks: Flow<PagingData<ShaarliLink>> =
linkRepository.getDeadLinksStream()
.cachedIn(viewModelScope)
val collections = collectionDao.getAllCollections()
.stateIn(viewModelScope, kotlinx.coroutines.flow.SharingStarted.WhileSubscribed(5_000), emptyList())
private val _selectedLinkIds = MutableStateFlow<Set<Int>>(emptySet())
val selectedLinkIds: StateFlow<Set<Int>> = _selectedLinkIds.asStateFlow()
@ -60,6 +71,36 @@ class DeadLinksViewModel @Inject constructor(
}
}
fun verifySelectedLinks() {
viewModelScope.launch {
val ids = _selectedLinkIds.value.toList()
if (ids.isNotEmpty()) {
val links = linkDao.getLinksByIds(ids)
for (link in links) {
verifyLink(link.id, link.url)
}
clearSelection()
}
}
}
fun addLinksToCollection(collectionId: Long, linkIds: Set<Int>) {
if (linkIds.isEmpty()) return
viewModelScope.launch {
linkIds.forEach { linkId ->
try {
collectionDao.addLinkToCollection(
CollectionLinkCrossRef(collectionId = collectionId, linkId = linkId)
)
} catch (_: Exception) {
}
}
tokenManager.setCollectionsConfigDirty(true)
syncManager.syncNow()
clearSelection()
}
}
fun deleteLink(id: Int) {
viewModelScope.launch {
linkRepository.deleteLink(id)

View File

@ -285,6 +285,7 @@ fun FeedScreen(
onNavigateToReminders: () -> Unit = {},
onNavigateToTodo: () -> Unit = {},
onNavigateToTodoDetail: (Int) -> Unit = {},
onNavigateToBrowser: (String) -> Unit = {},
onPlayAudio: ((com.shaarit.domain.model.ShaarliLink) -> Unit)? = null,
initialTagFilter: String? = null,
initialCollectionId: Long? = null,
@ -1709,6 +1710,9 @@ fun FeedScreen(
reminderTargetLinkId = linkId
reminderTargetLinkTitle = link.title
showReminderSheet = true
},
onOpenInternalClick = { url ->
onNavigateToBrowser(url)
}
)
}

View File

@ -26,6 +26,7 @@ import androidx.compose.material.icons.filled.Alarm
import androidx.compose.material.icons.filled.PlayArrow
import androidx.compose.material.icons.filled.Settings
import androidx.compose.material.icons.filled.TaskAlt
import androidx.compose.material.icons.filled.OpenInBrowser
import androidx.compose.foundation.rememberScrollState
import androidx.compose.foundation.verticalScroll
import androidx.compose.ui.window.DialogProperties
@ -822,7 +823,8 @@ fun LinkDetailsView(
onDismiss: () -> Unit,
onLinkClick: (String) -> Unit,
onReadClick: ((Int) -> Unit)? = null,
onReminderClick: ((Int) -> Unit)? = null
onReminderClick: ((Int) -> Unit)? = null,
onOpenInternalClick: ((String) -> Unit)? = null
) {
Box(
modifier = Modifier
@ -911,8 +913,9 @@ fun LinkDetailsView(
Spacer(modifier = Modifier.height(12.dp))
// Reader Mode & Reminder actions
Row(
horizontalArrangement = Arrangement.spacedBy(8.dp)
FlowRow(
horizontalArrangement = Arrangement.spacedBy(8.dp),
verticalArrangement = Arrangement.spacedBy(8.dp)
) {
if (onReadClick != null && !link.url.startsWith("note://")) {
OutlinedButton(
@ -935,6 +938,18 @@ fun LinkDetailsView(
Text("Rappel", style = MaterialTheme.typography.labelMedium)
}
}
if (onOpenInternalClick != null && !link.url.startsWith("note://")) {
OutlinedButton(
onClick = {
onOpenInternalClick(link.url)
onDismiss()
}
) {
Icon(Icons.Default.OpenInBrowser, contentDescription = null, modifier = Modifier.size(18.dp))
Spacer(modifier = Modifier.width(6.dp))
Text("Ouvrir", style = MaterialTheme.typography.labelMedium)
}
}
}
Spacer(modifier = Modifier.height(16.dp))

View File

@ -52,6 +52,9 @@ sealed class Screen(val route: String) {
object TodoDetail : Screen("todoDetail/{linkId}") {
fun createRoute(linkId: Int): String = "todoDetail/$linkId"
}
object Browser : Screen("browser/{encodedUrl}") {
fun createRoute(encodedUrl: String): String = "browser/$encodedUrl"
}
}
@Composable
@ -167,6 +170,10 @@ fun AppNavGraph(
onNavigateToReminders = { navController.navigate(Screen.Reminders.route) },
onNavigateToTodo = { navController.navigate(Screen.Todo.route) },
onNavigateToTodoDetail = { linkId -> navController.navigate(Screen.TodoDetail.createRoute(linkId)) },
onNavigateToBrowser = { url ->
val encoded = URLEncoder.encode(url, "UTF-8")
navController.navigate(Screen.Browser.createRoute(encoded))
},
onPlayAudio = onPlayAudio,
initialTagFilter = tag,
initialCollectionId = collectionId
@ -374,5 +381,20 @@ fun AppNavGraph(
onNavigateBack = { navController.popBackStack() }
)
}
composable(
route = Screen.Browser.route,
arguments = listOf(
navArgument("encodedUrl") {
type = NavType.StringType
}
)
) { backStackEntry ->
val encodedUrl = backStackEntry.arguments?.getString("encodedUrl") ?: ""
com.shaarit.presentation.browser.InternalBrowserScreen(
encodedUrl = encodedUrl,
onNavigateBack = { navController.popBackStack() }
)
}
}
}

BIN
eatingwell.html Normal file

Binary file not shown.

View File

@ -1,3 +1,3 @@
#Wed Apr 22 22:23:35 2026
VERSION_NAME=2.10.0
VERSION_CODE=35
#Thu Apr 23 15:59:20 2026
VERSION_NAME=2.12.0
VERSION_CODE=38