Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
signalapp
GitHub Repository: signalapp/Signal-iOS
Path: blob/main/Signal/OrphanData/OWSOrphanDataCleaner.swift
1 views
//
// Copyright 2022 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
//

import Foundation
import GRDB
import SignalServiceKit
import SignalUI

private struct OWSOrphanData {
    let interactionIds: Set<String>
    let filePaths: Set<String>
    let reactionIds: Set<String>
    let mentionIds: Set<String>
    let fileAndDirectoryPaths: Set<String>
    let hasOrphanedPacksOrStickers: Bool
}

// Notes:
//
// * On disk, we only bother cleaning up files, not directories.
enum OWSOrphanDataCleaner {

    private static let databaseStorage = SSKEnvironment.shared.databaseStorageRef

    static func cleanUp(shouldRemoveOrphanedData: Bool) async throws {
        Logger.info("starting orphan data \(shouldRemoveOrphanedData ? "cleanup" : "audit")")

        // Orphaned cleanup has one risk: It could accidentally delete data still
        // in use (e.g., a profile avatar that's been saved to disk but whose
        // OWSUserProfile hasn't yet been saved).
        //
        // To prevent accidental data deletion, we take the following measure:
        //
        // * We don't delete any data created more recently than N seconds before
        // we started cleaning orphaned data. This prevents any stray data
        // currently in use by the app from being accidentally cleaned up.
        let startTime = Date()
        let orphanedData = try await findOrphanedData()
        try await processOrphanedData(
            orphanedData,
            startTime: startTime,
            shouldRemoveOrphanedData: shouldRemoveOrphanedData,
        )
        Logger.info("completed orphaned data cleanup")
    }

    // MARK: - Find

    /// This method finds (but does not delete) orphaned data.
    private static func findOrphanedData() async throws -> OWSOrphanData {
        Logger.info("searching for orphaned data")

        let legacyProfileAvatarsFilePaths = try filePaths(inDirectorySafe: OWSUserProfile.legacyProfileAvatarsDirPath)
        let sharedDataProfileAvatarFilePaths = try filePaths(inDirectorySafe: OWSUserProfile.sharedDataProfileAvatarsDirPath)
        let allGroupAvatarFilePaths = try filePaths(inDirectorySafe: TSGroupModel.avatarsDirectory.path)
        let allStickerFilePaths = try filePaths(inDirectorySafe: StickerManager.cacheDirUrl().path)

        let allOnDiskFilePaths: Set<String> = {
            var result: Set<String> = []
            result.formUnion(legacyProfileAvatarsFilePaths)
            result.formUnion(sharedDataProfileAvatarFilePaths)
            result.formUnion(allGroupAvatarFilePaths)
            result.formUnion(allStickerFilePaths)
            // TODO: Badges?

            // This should be redundant, but this will future-proof us against ever
            // accidentally removing the GRDB databases during orphan clean up.
            let grdbPrimaryDirectoryPath = GRDBDatabaseStorageAdapter.databaseDirUrl(directoryMode: .primary).path
            let grdbHotswapDirectoryPath = GRDBDatabaseStorageAdapter.databaseDirUrl(directoryMode: .hotswapLegacy).path
            let grdbTransferDirectoryPath: String?
            if GRDBDatabaseStorageAdapter.hasAssignedTransferDirectory, TSAccountManagerObjcBridge.isTransferInProgressWithMaybeTransaction {
                grdbTransferDirectoryPath = GRDBDatabaseStorageAdapter.databaseDirUrl(directoryMode: .transfer).path
            } else {
                grdbTransferDirectoryPath = nil
            }

            let databaseFilePaths: Set<String> = {
                var filePathsToSubtract: Set<String> = []
                for filePath in result {
                    if filePath.hasPrefix(grdbPrimaryDirectoryPath) {
                        Logger.info("Protecting database file: \(filePath)")
                        filePathsToSubtract.insert(filePath)
                    } else if filePath.hasPrefix(grdbHotswapDirectoryPath) {
                        Logger.info("Protecting database hotswap file: \(filePath)")
                        filePathsToSubtract.insert(filePath)
                    } else if let grdbTransferDirectoryPath, filePath.hasPrefix(grdbTransferDirectoryPath) {
                        Logger.info("Protecting database hotswap file: \(filePath)")
                        filePathsToSubtract.insert(filePath)
                    }
                }
                return filePathsToSubtract
            }()
            result.subtract(databaseFilePaths)

            return result
        }()

        let profileAvatarFilePaths = databaseStorage.read { tx in
            return OWSProfileManager.allProfileAvatarFilePaths(transaction: tx)
        }

        try Task.checkCancellation()

        let groupAvatarFilePaths = databaseStorage.read { tx in
            return TSGroupModel.allGroupAvatarFilePaths(transaction: tx)
        }

        try Task.checkCancellation()

        let voiceMessageDraftOrphanedPaths = await findOrphanedVoiceMessageDraftPaths()

        try Task.checkCancellation()

        var orphanInteractionIds: Set<String> = []
        var orphanReactionIds: Set<String> = []
        var orphanMentionIds: Set<String> = []
        var activeStickerFilePaths: Set<String> = []
        var hasOrphanedPacksOrStickers = false
        try databaseStorage.read { transaction in
            let threadIds: Set<String> = Set(ThreadFinder().fetchUniqueIds(tx: transaction))

            var allInteractionIds: Set<String> = []
            do {
                let fetchCursor = try Row.fetchCursor(
                    transaction.database,
                    sql: "SELECT \(interactionColumn: .threadUniqueId), \(interactionColumn: .uniqueId) FROM \(InteractionRecord.databaseTableName)",
                )
                while let row = try fetchCursor.next() {
                    let threadUniqueId = row[0] as String
                    let uniqueId = row[1] as String
                    try Task.checkCancellation()
                    if threadUniqueId.isEmpty || !threadIds.contains(threadUniqueId) {
                        orphanInteractionIds.insert(uniqueId)
                    }
                    allInteractionIds.insert(uniqueId)
                }
            } catch let error as CancellationError {
                throw error
            } catch {
                owsFailDebug("Couldn't enumerate TSInteractions: \(error.grdbErrorForLogging)")
                throw error.grdbErrorForLogging
            }

            OWSReaction.anyEnumerate(transaction: transaction, batchingPreference: .batched()) { reaction, stop in
                if Task.isCancelled {
                    stop = true
                    return
                }
                if !allInteractionIds.contains(reaction.uniqueMessageId) {
                    orphanReactionIds.insert(reaction.uniqueId)
                }
            }
            try Task.checkCancellation()

            TSMention.anyEnumerate(transaction: transaction, batchingPreference: .batched()) { mention, stop in
                if Task.isCancelled {
                    stop = true
                    return
                }
                if !allInteractionIds.contains(mention.uniqueMessageId) {
                    orphanMentionIds.insert(mention.uniqueId)
                }
            }
            try Task.checkCancellation()

            activeStickerFilePaths.formUnion(StickerManager.filePathsForAllInstalledStickers(transaction: transaction))
            try Task.checkCancellation()

            hasOrphanedPacksOrStickers = StickerManager.hasOrphanedData(tx: transaction)
        }

        var orphanFilePaths = allOnDiskFilePaths
        orphanFilePaths.subtract(profileAvatarFilePaths)
        orphanFilePaths.subtract(groupAvatarFilePaths)
        orphanFilePaths.subtract(activeStickerFilePaths)

        var orphanFileAndDirectoryPaths: Set<String> = []
        orphanFileAndDirectoryPaths.formUnion(voiceMessageDraftOrphanedPaths)

        return OWSOrphanData(
            interactionIds: orphanInteractionIds,
            filePaths: orphanFilePaths,
            reactionIds: orphanReactionIds,
            mentionIds: orphanMentionIds,
            fileAndDirectoryPaths: orphanFileAndDirectoryPaths,
            hasOrphanedPacksOrStickers: hasOrphanedPacksOrStickers,
        )
    }

    /// Finds paths in `baseUrl` not present in `fetchExpectedRelativePaths()`.
    private static func findOrphanedPaths(
        baseUrl: URL,
        fetchExpectedRelativePaths: (DBReadTransaction) -> Set<String>,
    ) async -> Set<String> {
        let basePath = baseUrl.path

        // The ordering within this method is important. First, we search the file
        // system for files that already exist. Next, we ensure that any pending
        // database write operations have finished. This ensures that any files
        // written as part of a database transaction are visible to our read
        // transaction. If we skip the write transaction, we may treat just-created
        // files as orphaned and remove them. If a new write transaction is opened
        // after the one in this method, we won't treat any files it creates as
        // orphaned since we've already finished searching the file system.
        // Finally, we consult the database to see which files should exist.

        let actualRelativePaths: [String]
        do {
            actualRelativePaths = try FileManager.default.subpathsOfDirectory(atPath: basePath)
        } catch CocoaError.fileReadNoSuchFile {
            actualRelativePaths = []
        } catch {
            Logger.warn("Orphan data cleaner couldn't find any paths \(error.shortDescription)")
            actualRelativePaths = []
        }

        if actualRelativePaths.isEmpty {
            return []
        }

        await databaseStorage.awaitableWrite { _ in }
        var expectedRelativePaths = databaseStorage.read { fetchExpectedRelativePaths($0) }

        // Mark the directories that contain these files as expected as well. This
        // avoids redundant `rmdir` calls to check if the directories are empty.
        while true {
            let oldCount = expectedRelativePaths.count
            expectedRelativePaths.formUnion(expectedRelativePaths.lazy.map {
                ($0 as NSString).deletingLastPathComponent
            })
            let newCount = expectedRelativePaths.count
            if oldCount == newCount {
                break
            }
        }

        let orphanedRelativePaths = Set(actualRelativePaths).subtracting(expectedRelativePaths)
        return Set(orphanedRelativePaths.lazy.map { basePath.appendingPathComponent($0) })
    }

    private static func findOrphanedVoiceMessageDraftPaths() async -> Set<String> {
        await findOrphanedPaths(
            baseUrl: VoiceMessageInterruptedDraftStore.draftVoiceMessageDirectory,
            fetchExpectedRelativePaths: {
                VoiceMessageInterruptedDraftStore.allDraftFilePaths(transaction: $0)
            },
        )
    }

    // MARK: - Remove

    /// Deletes orphaned data.
    private static func processOrphanedData(
        _ orphanedData: OWSOrphanData,
        startTime: Date,
        shouldRemoveOrphanedData: Bool,
    ) async throws {
        // We need to avoid cleaning up new files that are still in the process of
        // being created/written, so we don't clean up anything recent.
        let minimumOrphanAgeSeconds: TimeInterval = 15 * .minute
        let thresholdDate = startTime.addingTimeInterval(-minimumOrphanAgeSeconds)

        var interactionsRemoved = 0
        for interactionId in orphanedData.interactionIds {
            try Task.checkCancellation()
            await databaseStorage.awaitableWrite { transaction in
                guard let interaction = TSInteraction.fetchViaCache(uniqueId: interactionId, transaction: transaction) else {
                    // This could just be a race condition, but it should be very unlikely.
                    Logger.warn("Could not load interaction: \(interactionId)")
                    return
                }
                // Don't delete interactions which were created in the last N minutes.
                let creationDate = Date(millisecondsSince1970: interaction.timestamp)
                guard creationDate <= thresholdDate else {
                    Logger.info("Skipping orphan interaction due to age: \(-creationDate.timeIntervalSinceNow)")
                    return
                }
                Logger.info("Removing orphan message: \(interaction.uniqueId)")
                interactionsRemoved += 1
                guard shouldRemoveOrphanedData else {
                    return
                }
                DependenciesBridge.shared.interactionDeleteManager
                    .delete(interaction, sideEffects: .default(), tx: transaction)
            }
        }
        Logger.info("Deleted orphan interactions: \(interactionsRemoved)")

        var reactionsRemoved = 0
        for reactionId in orphanedData.reactionIds {
            try Task.checkCancellation()
            await databaseStorage.awaitableWrite { tx in
                let performedCleanup = ReactionManager.tryToCleanupOrphanedReaction(
                    uniqueId: reactionId,
                    thresholdDate: thresholdDate,
                    shouldPerformRemove: shouldRemoveOrphanedData,
                    transaction: tx,
                )
                if performedCleanup {
                    reactionsRemoved += 1
                }
            }
        }
        Logger.info("Deleted orphan reactions: \(reactionsRemoved)")

        var mentionsRemoved = 0
        for mentionId in orphanedData.mentionIds {
            try Task.checkCancellation()
            await databaseStorage.awaitableWrite { tx in
                let performedCleanup = MentionFinder.tryToCleanupOrphanedMention(
                    uniqueId: mentionId,
                    thresholdDate: thresholdDate,
                    shouldPerformRemove: shouldRemoveOrphanedData,
                    transaction: tx,
                )
                if performedCleanup {
                    mentionsRemoved += 1
                }
            }
        }
        Logger.info("Deleted orphan mentions: \(mentionsRemoved)")

        if orphanedData.hasOrphanedPacksOrStickers {
            await databaseStorage.awaitableWrite { transaction in
                StickerManager.cleanUpOrphanedData(tx: transaction)
            }
        }
        try Task.checkCancellation()

        var filesRemoved = 0
        let filePaths = orphanedData.filePaths.sorted()
        for filePath in filePaths {
            try Task.checkCancellation()

            guard let attributes = try? FileManager.default.attributesOfItem(atPath: filePath) else {
                // This is fine; the file may have been deleted since we found it.
                Logger.warn("Could not get attributes of file at: \(filePath)")
                continue
            }
            // Don't delete files which were modified in the last N minutes.
            if let modificationDate = (attributes as NSDictionary).fileModificationDate(), modificationDate > thresholdDate {
                Logger.info("Skipping file due to age: \(-modificationDate.timeIntervalSinceNow)")
                continue
            }
            Logger.info("Deleting file: \(filePath)")
            filesRemoved += 1
            guard shouldRemoveOrphanedData else {
                continue
            }
            if !OWSFileSystem.deleteFile(filePath, ignoreIfMissing: true) {
                owsFailDebug("Could not remove orphan file")
            }
        }
        Logger.info("Deleted orphaned files: \(filesRemoved)")

        if shouldRemoveOrphanedData {
            try removeOrphanedFileAndDirectoryPaths(orphanedData.fileAndDirectoryPaths)
        }
    }

    private static func removeOrphanedFileAndDirectoryPaths(_ fileAndDirectoryPaths: Set<String>) throws {
        var successCount = 0
        var errorCount = 0
        // Sort by longest path to shortest path so that we remove files before we
        // try to remove the directories that contain them.
        for fileOrDirectoryPath in fileAndDirectoryPaths.sorted(by: { $0.count < $1.count }).reversed() {
            try Task.checkCancellation()
            do {
                try removeFileOrEmptyDirectory(at: fileOrDirectoryPath)
                successCount += 1
            } catch {
                owsFailDebug("Couldn't remove file or directory: \(error.shortDescription)")
                errorCount += 1
            }
        }
        Logger.info("Deleted orphaned files/directories [successes: \(successCount), failures: \(errorCount)]")
    }

    private static func removeFileOrEmptyDirectory(at path: String) throws {
        do {
            // First, remove it if it's a directory.
            try runUnixOperation(rmdir, argument: path)
        } catch POSIXError.ENOENT {
            // It doesn't exist (or a parent directory doesn't exist).
            return
        } catch POSIXError.ENOTEMPTY {
            // It's not empty, so don't delete it.
            return
        } catch POSIXError.ENOTDIR {
            // It's a file.
        } catch {
            Logger.warn("Couldn't remove directory \(error.shortDescription)")
            // Fall through since it seems like this isn't a directory...
        }

        do {
            try runUnixOperation(unlink, argument: path)
        } catch POSIXError.ENOTDIR, POSIXError.ENOENT {
            // The file (or its containing directory) doesn't exist.
            return
        } catch {
            throw error
        }
    }

    private static func runUnixOperation(_ op: (UnsafePointer<CChar>?) -> Int32, argument path: String) throws {
        let result = path.withCString { op($0) }
        if result == 0 {
            return
        }
        if let errorCode = POSIXErrorCode(rawValue: errno) {
            throw POSIXError(errorCode)
        }
        throw OWSGenericError("Operation failed.")
    }

    // MARK: - Helpers

    private static func filePaths(inDirectorySafe dirPath: String) throws -> Set<String> {
        guard FileManager.default.fileExists(atPath: dirPath) else {
            return []
        }
        var result: Set<String> = []
        let fileNames: [String]
        do {
            fileNames = try FileManager.default.contentsOfDirectory(atPath: dirPath)
        } catch {
            switch error {
            case POSIXError.ENOENT, CocoaError.fileReadNoSuchFile:
                // Races may cause files to be removed while we crawl the directory contents.
                Logger.warn("Error: \(error)")
            default:
                owsFailDebug("Error: \(error)")
            }
            return []
        }
        for fileName in fileNames {
            try Task.checkCancellation()
            let filePath = dirPath.appendingPathComponent(fileName)
            var isDirectory: ObjCBool = false
            if FileManager.default.fileExists(atPath: filePath, isDirectory: &isDirectory) {
                if isDirectory.boolValue {
                    result.formUnion(try filePaths(inDirectorySafe: filePath))
                } else {
                    result.insert(filePath)
                }
            }
        }
        return result
    }
}