Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -91,40 +91,35 @@ class SummarizeService(feedRepository: FeedRepository, client: Client[IO], apiKe
Stream
.eval(feedRepository.getTotalUnreadCount(user.id))
.flatMap { totalUnread =>
Stream.eval(feedRepository.getUnreadFeeds(user, batchSize, offset)).flatMap {
feeds =>
val remainingAfterThis = totalUnread - offset - feeds.size
val metadata = SummaryEvent.Metadata(
feedsProcessed = feeds.size,
totalRemaining = Math.max(0, remainingAfterThis),
hasMore = remainingAfterThis > 0
)

Stream.emit(metadata) ++ (
if feeds.isEmpty && offset == 0 then
Stream
.eval(generateFunFact(user, selectedModel.modelId))
.map(SummaryEvent.FunFact(_)) ++ Stream.emit(SummaryEvent.Done)
else if feeds.isEmpty then Stream.emit(SummaryEvent.Done)
else
val text = feeds.map(_.description).mkString("\n")
val strippedText = Jsoup.parse(text).text()
val validatedLanguage = user.settings.summaryLanguage
.flatMap(SummaryLanguage.fromString)
.getOrElse(SummaryLanguage.English)

Stream
.eval(
if user.settings.isAiMode then
feedRepository.markFeedAsRead(feeds.map(_.link), user)
else IO.unit
)
.drain ++ summarizeStream(
strippedText,
validatedLanguage.displayName,
selectedModel.modelId
)
)
Stream.eval(feedRepository.getUnreadFeeds(user, batchSize, 0)).flatMap { feeds =>
val remainingAfterThis = totalUnread - feeds.size
val metadata = SummaryEvent.Metadata(
feedsProcessed = feeds.size,
totalRemaining = Math.max(0, remainingAfterThis),
hasMore = remainingAfterThis > 0
)

Stream.emit(metadata) ++ (
if feeds.isEmpty && offset == 0 then
Stream
.eval(generateFunFact(user, selectedModel.modelId))
.map(SummaryEvent.FunFact(_)) ++ Stream.emit(SummaryEvent.Done)
else if feeds.isEmpty then Stream.emit(SummaryEvent.Done)
else
val text = feeds.map(_.description).mkString("\n")
val strippedText = Jsoup.parse(text).text()
val validatedLanguage = user.settings.summaryLanguage
.flatMap(SummaryLanguage.fromString)
.getOrElse(SummaryLanguage.English)

Stream
.eval(feedRepository.markFeedAsRead(feeds.map(_.link), user))
.drain ++ summarizeStream(
strippedText,
validatedLanguage.displayName,
selectedModel.modelId
)
Comment on lines +117 to +121

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The summarizeStream function at these lines is vulnerable to prompt injection, which can lead to stored Cross-Site Scripting (XSS) as the LLM-generated HTML output is not sanitized. It is crucial to sanitize this output using a library like Jsoup with a strict allow-list. Additionally, the change in this area unconditionally applies 'AI mode' behavior, causing a regression for non-AI users by breaking pagination and incorrectly marking feeds as read. The conditional logic based on user.settings.isAiMode must be restored.

                                .drain ++ summarizeStream(
                                strippedText,
                                validatedLanguage.displayName,
                                selectedModel.modelId
                            ).map {
                                case ru.trett.rss.models.SummaryEvent.Content(text) =>
                                    ru.trett.rss.models.SummaryEvent.Content(org.jsoup.Jsoup.clean(text, org.jsoup.safety.Safelist.relaxed()))
                                case event => event
                            }

)
}
}
.handleErrorWith { error =>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package ru.trett.rss.server.services

import cats.effect.IO
import cats.effect.unsafe.implicits.global
import org.http4s.client.Client
import org.scalamock.scalatest.MockFactory
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import org.typelevel.log4cats.LoggerFactory
import org.typelevel.log4cats.noop.NoOpFactory
import ru.trett.rss.server.models.User
import ru.trett.rss.server.repositories.FeedRepository

class SummarizeServiceSpec extends AnyFunSuite with Matchers with MockFactory {

test("streamSummary should always fetch feeds from offset 0 regardless of provided offset") {
val feedRepository = mock[FeedRepository]
val client = mock[Client[IO]]
val user = User("user-id", "User", "user@example.com", User.Settings())

implicit val loggerFactory: LoggerFactory[IO] = NoOpFactory[IO]

// Mock getTotalUnreadCount
(feedRepository.getTotalUnreadCount)
.expects("user-id")
.returning(IO.pure(60))

// This is the CRITICAL part: even if streamSummary is called with offset 30,
// it MUST call feedRepository.getUnreadFeeds with offset 0 because
// it's in AI mode and feeds from the previous batch were already marked as read.
(feedRepository
.getUnreadFeeds(_: User, _: Int, _: Int))
.expects(user, 30, 0) // batchSize is 30, expected offset is 0
.returning(IO.pure(List.empty))

val service = new SummarizeService(feedRepository, client, "api-key")

// Call with offset 30 (simulating "Load More" click)
service.streamSummary(user, 30).compile.toList.unsafeRunSync()
}
Comment on lines +16 to +40

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This is a good test for the 'AI mode' behavior. To improve test coverage and prevent regressions like the one I mentioned in SummarizeService.scala, it would be beneficial to add another test case for when AI mode is disabled (isAiMode is false).

This test would verify that:

  • The offset parameter passed to streamSummary is correctly passed through to feedRepository.getUnreadFeeds.
  • feedRepository.markFeedAsRead is not called.

This would ensure that the non-AI mode functionality remains correct and prevent future regressions.

}