Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
signalapp
GitHub Repository: signalapp/Signal-iOS
Path: blob/main/SignalServiceKit/Backups/Archiving/Archivers/ChatItem/BackupOversizeTextCache.swift
1 views
//
// Copyright 2025 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
//

public import GRDB

/// This table is used exclusively by backups to import/export inlined "oversize" text.
///
/// For Context: "oversize" text is when a message's body exceeds ``OWSMediaUtils.kOversizeTextMessageSizeThresholdBytes``;
/// the full text (including the first ``OWSMediaUtils.kOversizeTextMessageSizeThresholdBytes`` bytes) is represented as an Attachment
/// for purposes of message sending/receiving.
/// Backups have a separate, larger threshold (``BackupOversizeTextCache/maxTextLengthBytes``). All oversize
/// text attachments are truncated to this length and inlined in the backup proto (bytes past this length are simply dropped).
///
/// Because the _rest of the app_ represents oversize text as an attachment file on disk, but backups prefers not to do file i/o\*,
/// we instead write all inlined oversize text to this table to be used by import/export.
///
/// For export, we populate this table as part of backups, before opening the write tx. Population is incremental; we don't
/// wipe the table so we only need to populate any new oversize text atachments that got created since the last backup.
///
/// For import, we populate the table with inlined text from the backup, and block backup restore completion on then translating
/// all the inlined text into Attachment stream files after the backup write tx commits.
///
/// \* Two reasons to avoid file i/o
///   1. performance
///   2. during restore if we cancel/terminate the whole backup write transaction is rolled back but any file i/o we did
///     at the same time is not rolled back; we'd need a mechanism to clean up the files.
public struct BackupOversizeTextCache: Codable, FetchableRecord, MutablePersistableRecord {

    /// Every row in this table is limited to this many bytes (not characters) of text, in both
    /// the Swift model object and at the SQLite level.
    public static let maxTextLengthBytes = OWSMediaUtils.kMaxOversizeTextMessageReceiveSizeBytes

    public typealias IDType = Int64

    public private(set) var id: IDType?
    public let attachmentRowId: Attachment.IDType
    public let text: String

    fileprivate init(id: IDType?, attachmentRowId: Attachment.IDType, text: String) {
        self.id = id
        self.attachmentRowId = attachmentRowId
        self.text = text
    }

    // MARK: FetchableRecord

    public static var databaseTableName: String { "BackupOversizeTextCache" }

    // MARK: MutablePersistableRecord

    public mutating func didInsert(with rowID: Int64, for column: String?) {
        self.id = rowID
    }

    // MARK: Codable

    public enum CodingKeys: String, CodingKey {
        case id
        case attachmentRowId
        case text
    }
}

extension BackupArchive {
    struct ArchivedMessageBody {
        let inlinedText: String
        let oversizedTextPointer: BackupProto_FilePointer?
    }
}

class BackupArchiveInlinedOversizeTextArchiver {

    private let attachmentsArchiver: BackupArchiveMessageAttachmentArchiver
    private let attachmentContentValidator: AttachmentContentValidator
    private let attachmentManager: AttachmentManager
    private let attachmentStore: AttachmentStore
    private let db: DB
    private let kvStore: KeyValueStore
    private let logger: PrefixedLogger
    private let orphanedAttachmentStore: OrphanedAttachmentStore

    private static let lastRestoredRowIdKey = "lastRestoredRowIdKey"

    // MARK: - Public API

    init(
        attachmentsArchiver: BackupArchiveMessageAttachmentArchiver,
        attachmentContentValidator: AttachmentContentValidator,
        attachmentManager: AttachmentManager,
        attachmentStore: AttachmentStore,
        db: DB,
        orphanedAttachmentStore: OrphanedAttachmentStore,
    ) {
        self.attachmentsArchiver = attachmentsArchiver
        self.attachmentContentValidator = attachmentContentValidator
        self.attachmentManager = attachmentManager
        self.attachmentStore = attachmentStore
        self.db = db
        self.logger = PrefixedLogger(prefix: "[Backups]")
        self.kvStore = KeyValueStore(collection: "BackupOversizeTextCacheStore")
        self.orphanedAttachmentStore = orphanedAttachmentStore
    }

    // MARK: - Archive

    /// Populate the BackupOversizeTextCache table with any oversize text attachment streams that weren't
    /// already present. After calling this method, BackupOversizeTextCache can be read for backup export.
    /// Message processing (and sending) should be suspended while this runs, so that new attachments are not created,
    func populateTableIncrementally(progress: OWSProgressSink?) async throws {
        // We can get away with fetching attachment ids in one read then processing in separate
        // writes because no new attachments should be created while backups is running.
        // Worst case, we miss an attachment and the oversized text ends up truncated
        // or as a pointer in the backup.
        var attachmentIdIndex = 0
        let attachmentIds: [Attachment.IDType] = try db.read { tx in
            try self.attachmentRowIdsForTablePopulation(tx: tx)
        }

        let progressSource: OWSProgressSource?
        if let progress {
            progressSource = await progress.addSource(
                withLabel: "BackupOversizeTextCache",
                unitCount: UInt64(attachmentIds.count),
            )
        } else {
            progressSource = nil
        }

        if attachmentIds.isEmpty {
            return
        }

        try await TimeGatedBatch.processAll(db: db) { tx in
            let batchIds = attachmentIds.dropFirst(attachmentIdIndex).prefix(Self.batchCount)
            attachmentIdIndex += Self.batchCount
            try self.populateTableIncrementallyBatch(
                attachmentIds: batchIds,
                progress: progressSource,
                tx: tx,
            )
            return batchIds.isEmpty ? .done(()) : .more
        }
    }

    typealias ArchivedMessageBody = BackupArchive.ArchivedMessageBody

    func archiveMessageBody(
        text: String,
        oversizeTextReferencedAttachment: ReferencedAttachment?,
        messageId: BackupArchive.InteractionUniqueId,
        context: BackupArchive.ArchivingContext,
    ) -> BackupArchive.ArchiveInteractionResult<ArchivedMessageBody> {
        var text = text
        // It was possible, in the past, to end up with inlined text
        // longer than OWSMediaUtils.kOversizeTextMessageSizeThresholdBytes; inline
        // this now at the oversize text limit.
        if text.lengthOfBytes(using: .utf8) > BackupOversizeTextCache.maxTextLengthBytes {
            text = text.trimToUtf8ByteCount(BackupOversizeTextCache.maxTextLengthBytes)
        }

        guard
            let oversizeTextReferencedAttachment
        else {
            // No oversized text if there's no corresponding attachment!
            return .success(ArchivedMessageBody(
                inlinedText: text,
                oversizedTextPointer: nil,
            ))
        }

        let oversizedText: String?
        do {
            oversizedText = try self.fetchInlineableOversizedText(
                attachmentId: oversizeTextReferencedAttachment.attachment.id,
                tx: context.tx,
            )
        } catch {
            return .completeFailure(.fatalArchiveError(.oversizedTextCacheFetchError(error)))
        }

        if let oversizedText {
            // If we had downloaded the attachment, we'd have an oversized text to inline.
            // If we inline, no need to include a pointer (in fact, doing so is disallowed).
            return .success(ArchivedMessageBody(
                inlinedText: oversizedText,
                oversizedTextPointer: nil,
            ))
        } else {
            // Otherwise the best we can do is return a pointer.
            let oversizeTextProto = attachmentsArchiver.archiveOversizeTextAttachment(
                referencedAttachment: oversizeTextReferencedAttachment,
                context: context,
            )
            return .success(ArchivedMessageBody(
                inlinedText: text.trimToUtf8ByteCount(OWSMediaUtils.kOversizeTextMessageSizeThresholdBytes),
                oversizedTextPointer: oversizeTextProto,
            ))
        }
    }

    // MARK: Restore

    typealias RestoredMessageBody = BackupArchive.RestoredMessageContents.Text.RestoredMessageBody

    func restoreMessageBody(
        _ text: String,
        bodyRanges: MessageBodyRanges,
        oversizeTextAttachment: BackupProto_FilePointer?,
        chatItemId: BackupArchive.ChatItemId,
    ) -> BackupArchive.RestoreInteractionResult<RestoredMessageBody?> {
        var partialErrors = [BackupArchive.RestoreFrameError<BackupArchive.ChatItemId>]()

        var text = text
        let inlinedTextLength = text.lengthOfBytes(using: .utf8)
        if inlinedTextLength > BackupOversizeTextCache.maxTextLengthBytes {
            // It is never allowed to have text beyond this limit inlined,
            // truncate and drop any excess.
            partialErrors.append(.restoreFrameError(
                .invalidProtoData(.standardMessageWayTooOversizedBody),
                chatItemId,
            ))
            text = text.trimToUtf8ByteCount(BackupOversizeTextCache.maxTextLengthBytes)
        }
        let inlinedBody: MessageBody
        if inlinedTextLength > OWSMediaUtils.kOversizeTextMessageSizeThresholdBytes {
            inlinedBody = MessageBody(
                text: text.trimToUtf8ByteCount(OWSMediaUtils.kOversizeTextMessageSizeThresholdBytes),
                ranges: bodyRanges,
            )
        } else {
            inlinedBody = MessageBody(text: text, ranges: bodyRanges)
        }

        let oversizeText: RestoredMessageBody.OversizeText?
        if let oversizeTextAttachment {
            if text.isEmpty {
                return .messageFailure([.restoreFrameError(
                    .invalidProtoData(.longTextStandardMessageMissingBody),
                    chatItemId,
                )])
            } else if inlinedTextLength > OWSMediaUtils.kOversizeTextMessageSizeThresholdBytes {
                // If we have an oversize text attachment, we are not allowed to _also_
                // have inlined oversize text (that exceeds the standard body length limit).
                partialErrors.append(.restoreFrameError(
                    .invalidProtoData(.longTextStandardMessageWithOversizeBody),
                    chatItemId,
                ))
                // Drop the pointer; treat the text as inlined.
                oversizeText = .inlined(text)
            } else {
                oversizeText = .attachmentPointer(oversizeTextAttachment)
            }
        } else if inlinedTextLength > OWSMediaUtils.kOversizeTextMessageSizeThresholdBytes {
            oversizeText = .inlined(text)
        } else {
            oversizeText = nil
        }

        let restoredBody = RestoredMessageBody(
            inlinedBody: inlinedBody,
            oversizeText: oversizeText,
        )

        if partialErrors.isEmpty {
            return .success(restoredBody)
        } else {
            // We still get text, albeit potentially truncated, if
            // we have these failures. So count as a partial restore, not
            // complete failure.
            return .partialRestore(restoredBody, partialErrors)
        }
    }

    /// Restore oversized text from a backup, preparing it to be fully restored later
    /// (after this tx commits) by `finishRestoringAll()`.
    func restoreOversizeText(
        _ oversizedText: RestoredMessageBody.OversizeText,
        messageRowId: Int64,
        message: TSMessage,
        thread: BackupArchive.ChatThread,
        chatItemId: BackupArchive.ChatItemId,
        context: BackupArchive.ChatItemRestoringContext,
    ) -> BackupArchive.RestoreInteractionResult<Void> {
        let text: String
        switch oversizedText {
        case .attachmentPointer(let attachmentPointer):
            return attachmentsArchiver.restoreOversizeTextAttachment(
                attachmentPointer,
                chatItemId: chatItemId,
                messageRowId: messageRowId,
                message: message,
                thread: thread,
                context: context,
            )
        case .inlined(let _text):
            text = _text
        }

        // Construct an undownloadable FilePointer proto so that we can use it
        // to construct a placeholder, undownloadable attachment that we will
        // later populate with the oversized text in `finishRestoringAll`
        var fakeProto = BackupProto_FilePointer()
        fakeProto.locatorInfo = BackupProto_FilePointer.LocatorInfo()
        fakeProto.contentType = MimeType.textXSignalPlain.rawValue

        let ownedAttachment = OwnedAttachmentBackupPointerProto(
            proto: fakeProto,
            renderingFlag: .default,
            clientUUID: nil,
            owner: .messageOversizeText(.init(
                messageRowId: messageRowId,
                receivedAtTimestamp: message.receivedAtTimestamp,
                threadRowId: thread.threadRowId,
                isPastEditRevision: message.isPastEditRevision(),
            )),
        )

        // Whether we're free or paid this should be set when we restored the account data frame.
        guard let uploadEra = context.uploadEra else {
            return .messageFailure([.restoreFrameError(.invalidProtoData(.accountDataNotFound), chatItemId)])
        }

        attachmentManager.createAttachmentPointer(
            from: ownedAttachment,
            uploadEra: uploadEra,
            attachmentByteCounter: context.attachmentByteCounter,
            tx: context.tx,
        )

        // Fetch the attachment reference we just created.
        let reference = attachmentStore.fetchAnyReference(
            owner: .messageOversizeText(messageRowId: messageRowId),
            tx: context.tx,
        )

        guard let reference else {
            return .messageFailure([.restoreFrameError(
                .failedToCreateAttachment,
                chatItemId,
            )])
        }

        do {
            try self.insert(attachmentId: reference.attachmentRowId, text: text, tx: context.tx)
        } catch {
            return .messageFailure([.restoreFrameError(
                .failedToCreateAttachment,
                chatItemId,
            )])
        }

        return .success(())
    }

    func finishRestoringOversizedTextAttachments(
        progress: OWSProgressSink?,
    ) async throws {
        let progressSource: OWSProgressSource?
        if let progress {
            let unitCount = try db.read { tx in
                let minId = kvStore.getInt64(Self.lastRestoredRowIdKey, defaultValue: 0, transaction: tx)
                return try BackupOversizeTextCache
                    .filter(Column(BackupOversizeTextCache.CodingKeys.id) > minId)
                    .fetchCount(tx.database)
            }
            progressSource = await progress.addSource(withLabel: "OversizedTexts", unitCount: UInt64(max(1, unitCount)))
        } else {
            progressSource = nil
        }

        var finished = false
        while !finished {
            finished = try await self.finishRestoringOversizedTextAttachmentBatch()
            if let progressSource {
                let remainingUnitCount = progressSource.totalUnitCount - progressSource.completedUnitCount
                if remainingUnitCount > 0 {
                    progressSource.incrementCompletedUnitCount(by: min(remainingUnitCount, UInt64(Self.batchCount)))
                }
            }
        }
    }

    // MARK: - Helpers

    private func fetchInlineableOversizedText(attachmentId: Attachment.IDType, tx: DBReadTransaction) throws -> String? {
        return try BackupOversizeTextCache
            .filter(Column(BackupOversizeTextCache.CodingKeys.attachmentRowId) == attachmentId)
            .fetchOne(tx.database)?
            .text
    }

    @discardableResult
    private func insert(attachmentId: Attachment.IDType, text: String, tx: DBWriteTransaction) throws -> BackupOversizeTextCache.IDType {
        var text = text
        if text.lengthOfBytes(using: .utf8) > BackupOversizeTextCache.maxTextLengthBytes {
            logger.error("Oversized backup text too long! Truncating...")
            text = text.trimToUtf8ByteCount(BackupOversizeTextCache.maxTextLengthBytes)
        }
        var record = BackupOversizeTextCache(id: nil, attachmentRowId: attachmentId, text: text)
        try record.insert(tx.database)
        return record.id!
    }

    // Work in batches of 50 so we can make (and commit) incremental progress.
    static let batchCount = 50

    private func attachmentRowIdsForTablePopulation(tx: DBReadTransaction) throws -> [Attachment.IDType] {
        return try Attachment.Record
            .filter(Column(Attachment.Record.CodingKeys.contentType) == Attachment.ContentTypeRaw.file.rawValue)
            .filter(Column(Attachment.Record.CodingKeys.mimeType) == MimeType.textXSignalPlain.rawValue)
            .filter(Column(Attachment.Record.CodingKeys.localRelativeFilePath) != nil)
            // Only rows not already represented in the oversize text cache table
            .filter(
                !BackupOversizeTextCache
                    .select(Column(BackupOversizeTextCache.CodingKeys.attachmentRowId))
                    .filter(
                        SQL(stringLiteral: "\(Attachment.Record.databaseTableName).\(Attachment.Record.CodingKeys.sqliteId.rawValue)")
                            == Column(BackupOversizeTextCache.CodingKeys.attachmentRowId),
                    )
                    .exists(),
            )
            .select(Column(Attachment.Record.CodingKeys.sqliteId))
            .fetchAll(tx.database)
    }

    // Returns number of rows processed. Returns 0 if finished.
    private func populateTableIncrementallyBatch(
        attachmentIds: ArraySlice<Attachment.IDType>,
        progress: OWSProgressSource?,
        tx: DBWriteTransaction,
    ) throws {
        var maxRecordId: BackupOversizeTextCache.IDType = 0
        for attachmentId in attachmentIds {
            guard let stream = attachmentStore.fetch(id: attachmentId, tx: tx)?.asStream() else {
                continue
            }
            owsAssertDebug(stream.contentType.raw == .file)
            owsAssertDebug(stream.mimeType == MimeType.textXSignalPlain.rawValue)

            // If the attachment fails to decrypt, skip this record.
            if let text = try? stream.decryptedLongText() {
                let recordId = try self.insert(attachmentId: stream.id, text: text, tx: tx)
                maxRecordId = max(maxRecordId, recordId)
            } else {
                logger.error("Failed to decrypt long text! Skipping.")
            }
            if let progress {
                progress.incrementCompletedUnitCount(by: 1)
            }
        }
        // Treat these rows as "restored" (since we already have a corresponding attachment stream).
        // We'll never do a restore after doing an archive, but its still best practice to set.
        kvStore.setInt64(maxRecordId, key: Self.lastRestoredRowIdKey, transaction: tx)
    }

    // Returns true if done (no more rows to restore)
    private func finishRestoringOversizedTextAttachmentBatch() async throws -> Bool {
        let records = try db.read { tx in
            let minId = kvStore.getInt64(Self.lastRestoredRowIdKey, defaultValue: 0, transaction: tx)
            return try BackupOversizeTextCache
                .filter(Column(BackupOversizeTextCache.CodingKeys.id) > minId)
                .order(Column(BackupOversizeTextCache.CodingKeys.id).asc)
                .limit(Self.batchCount)
                .fetchAll(tx.database)
        }
        if records.isEmpty {
            return true
        }
        var cacheIdToAttachmentId = [BackupOversizeTextCache.IDType: Attachment.IDType]()
        var attachmentIdToCacheIds = [Attachment.IDType: [BackupOversizeTextCache.IDType]]()
        var messageBodies = [BackupOversizeTextCache.IDType: MessageBody]()
        for record in records {
            cacheIdToAttachmentId[record.id!] = record.attachmentRowId
            var cacheIds = attachmentIdToCacheIds[record.attachmentRowId] ?? []
            cacheIds.append(record.id!)
            attachmentIdToCacheIds[record.attachmentRowId] = cacheIds
            messageBodies[record.id!] = MessageBody(text: record.text, ranges: .empty)
        }

        var attachmentKeys = [BackupOversizeTextCache.IDType: AttachmentKey]()
        try db.read { tx in
            for attachment in attachmentStore.fetch(ids: Array(attachmentIdToCacheIds.keys), tx: tx) {
                for recordId in attachmentIdToCacheIds[attachment.id] ?? [] {
                    attachmentKeys[recordId] = try AttachmentKey(combinedKey: attachment.encryptionKey)
                }
            }
        }

        do {
            let pendingAttachments = try await attachmentContentValidator.prepareOversizeTextsIfNeeded(
                from: messageBodies,
                attachmentKeys: attachmentKeys,
            )

            try await db.awaitableWrite { tx in
                var maxRecordId: BackupOversizeTextCache.IDType = 0
                defer {
                    // Mark progress by writing the max record id.
                    kvStore.setInt64(maxRecordId, key: Self.lastRestoredRowIdKey, transaction: tx)
                }
                for (recordId, validatedMessageBody) in pendingAttachments {
                    maxRecordId = max(maxRecordId, recordId)
                    guard let pendingAttachment = validatedMessageBody.oversizeText else {
                        owsFailDebug("Got oversize text thats fits a normal message?")
                        continue
                    }
                    guard let attachmentId = cacheIdToAttachmentId[recordId] else {
                        owsFailDebug("Missing attachment id")
                        continue
                    }
                    guard
                        orphanedAttachmentStore.orphanAttachmentExists(
                            with: pendingAttachment.orphanRecordId,
                            tx: tx,
                        )
                    else {
                        throw OWSAssertionError("Attachment file deleted before creation")
                    }

                    attachmentManager.updateAttachmentWithOversizeTextFromBackup(
                        attachmentId: attachmentId,
                        pendingAttachment: pendingAttachment,
                        tx: tx,
                    )
                }
            }
        } catch let error {
            owsFailDebug("Unable to process batch \(error.grdbErrorForLogging)")
            // Skip this batch; the backup already committed and theres no going back,
            // so all we can do is drop this long text to avoid bricking the app entirely.
            let maxRecordId: BackupOversizeTextCache.IDType = records.lazy.compactMap(\.id).max() ?? 0
            await db.awaitableWrite { tx in
                kvStore.setInt64(maxRecordId, key: Self.lastRestoredRowIdKey, transaction: tx)
            }
        }
        return false
    }
}