// SPDX-License-Identifier: GPL-2.01/*2* Copyright(c) 2014 Intel Mobile Communications GmbH3* Copyright(c) 2015 Intel Deutschland GmbH4*5* Author: Johannes Berg <[email protected]>6*/7#include <linux/module.h>8#include <linux/device.h>9#include <linux/devcoredump.h>10#include <linux/list.h>11#include <linux/slab.h>12#include <linux/fs.h>13#include <linux/workqueue.h>1415static struct class devcd_class;1617/* global disable flag, for security purposes */18static bool devcd_disabled;1920struct devcd_entry {21struct device devcd_dev;22void *data;23size_t datalen;24/*25* Here, mutex is required to serialize the calls to del_wk work between26* user/kernel space which happens when devcd is added with device_add()27* and that sends uevent to user space. User space reads the uevents,28* and calls to devcd_data_write() which try to modify the work which is29* not even initialized/queued from devcoredump.30*31*32*33* cpu0(X) cpu1(Y)34*35* dev_coredump() uevent sent to user space36* device_add() ======================> user space process Y reads the37* uevents writes to devcd fd38* which results into writes to39*40* devcd_data_write()41* mod_delayed_work()42* try_to_grab_pending()43* timer_delete()44* debug_assert_init()45* INIT_DELAYED_WORK()46* schedule_delayed_work()47*48*49* Also, mutex alone would not be enough to avoid scheduling of50* del_wk work after it get flush from a call to devcd_free()51* mentioned as below.52*53* disabled_store()54* devcd_free()55* mutex_lock() devcd_data_write()56* flush_delayed_work()57* mutex_unlock()58* mutex_lock()59* mod_delayed_work()60* mutex_unlock()61* So, delete_work flag is required.62*/63struct mutex mutex;64bool delete_work;65struct module *owner;66ssize_t (*read)(char *buffer, loff_t offset, size_t count,67void *data, size_t datalen);68void (*free)(void *data);69struct delayed_work del_wk;70struct device *failing_dev;71};7273static struct devcd_entry *dev_to_devcd(struct device *dev)74{75return container_of(dev, struct devcd_entry, devcd_dev);76}7778static void devcd_dev_release(struct device *dev)79{80struct devcd_entry *devcd = dev_to_devcd(dev);8182devcd->free(devcd->data);83module_put(devcd->owner);8485/*86* this seems racy, but I don't see a notifier or such on87* a struct device to know when it goes away?88*/89if (devcd->failing_dev->kobj.sd)90sysfs_delete_link(&devcd->failing_dev->kobj, &dev->kobj,91"devcoredump");9293put_device(devcd->failing_dev);94kfree(devcd);95}9697static void devcd_del(struct work_struct *wk)98{99struct devcd_entry *devcd;100101devcd = container_of(wk, struct devcd_entry, del_wk.work);102103device_del(&devcd->devcd_dev);104put_device(&devcd->devcd_dev);105}106107static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,108const struct bin_attribute *bin_attr,109char *buffer, loff_t offset, size_t count)110{111struct device *dev = kobj_to_dev(kobj);112struct devcd_entry *devcd = dev_to_devcd(dev);113114return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);115}116117static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,118const struct bin_attribute *bin_attr,119char *buffer, loff_t offset, size_t count)120{121struct device *dev = kobj_to_dev(kobj);122struct devcd_entry *devcd = dev_to_devcd(dev);123124mutex_lock(&devcd->mutex);125if (!devcd->delete_work) {126devcd->delete_work = true;127mod_delayed_work(system_wq, &devcd->del_wk, 0);128}129mutex_unlock(&devcd->mutex);130131return count;132}133134static const struct bin_attribute devcd_attr_data =135__BIN_ATTR(data, 0600, devcd_data_read, devcd_data_write, 0);136137static const struct bin_attribute *const devcd_dev_bin_attrs[] = {138&devcd_attr_data, NULL,139};140141static const struct attribute_group devcd_dev_group = {142.bin_attrs = devcd_dev_bin_attrs,143};144145static const struct attribute_group *devcd_dev_groups[] = {146&devcd_dev_group, NULL,147};148149static int devcd_free(struct device *dev, void *data)150{151struct devcd_entry *devcd = dev_to_devcd(dev);152153mutex_lock(&devcd->mutex);154if (!devcd->delete_work)155devcd->delete_work = true;156157flush_delayed_work(&devcd->del_wk);158mutex_unlock(&devcd->mutex);159return 0;160}161162static ssize_t disabled_show(const struct class *class, const struct class_attribute *attr,163char *buf)164{165return sysfs_emit(buf, "%d\n", devcd_disabled);166}167168/*169*170* disabled_store() worker()171* class_for_each_device(&devcd_class,172* NULL, NULL, devcd_free)173* ...174* ...175* while ((dev = class_dev_iter_next(&iter))176* devcd_del()177* device_del()178* put_device() <- last reference179* error = fn(dev, data) devcd_dev_release()180* devcd_free(dev, data) kfree(devcd)181* mutex_lock(&devcd->mutex);182*183*184* In the above diagram, it looks like disabled_store() would be racing with parallelly185* running devcd_del() and result in memory abort while acquiring devcd->mutex which186* is called after kfree of devcd memory after dropping its last reference with187* put_device(). However, this will not happens as fn(dev, data) runs188* with its own reference to device via klist_node so it is not its last reference.189* so, above situation would not occur.190*/191192static ssize_t disabled_store(const struct class *class, const struct class_attribute *attr,193const char *buf, size_t count)194{195long tmp = simple_strtol(buf, NULL, 10);196197/*198* This essentially makes the attribute write-once, since you can't199* go back to not having it disabled. This is intentional, it serves200* as a system lockdown feature.201*/202if (tmp != 1)203return -EINVAL;204205devcd_disabled = true;206207class_for_each_device(&devcd_class, NULL, NULL, devcd_free);208209return count;210}211static CLASS_ATTR_RW(disabled);212213static struct attribute *devcd_class_attrs[] = {214&class_attr_disabled.attr,215NULL,216};217ATTRIBUTE_GROUPS(devcd_class);218219static struct class devcd_class = {220.name = "devcoredump",221.dev_release = devcd_dev_release,222.dev_groups = devcd_dev_groups,223.class_groups = devcd_class_groups,224};225226static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count,227void *data, size_t datalen)228{229return memory_read_from_buffer(buffer, count, &offset, data, datalen);230}231232static void devcd_freev(void *data)233{234vfree(data);235}236237/**238* dev_coredumpv - create device coredump with vmalloc data239* @dev: the struct device for the crashed device240* @data: vmalloc data containing the device coredump241* @datalen: length of the data242* @gfp: allocation flags243*244* This function takes ownership of the vmalloc'ed data and will free245* it when it is no longer used. See dev_coredumpm() for more information.246*/247void dev_coredumpv(struct device *dev, void *data, size_t datalen,248gfp_t gfp)249{250dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, devcd_freev);251}252EXPORT_SYMBOL_GPL(dev_coredumpv);253254static int devcd_match_failing(struct device *dev, const void *failing)255{256struct devcd_entry *devcd = dev_to_devcd(dev);257258return devcd->failing_dev == failing;259}260261/**262* devcd_free_sgtable - free all the memory of the given scatterlist table263* (i.e. both pages and scatterlist instances)264* NOTE: if two tables allocated with devcd_alloc_sgtable and then chained265* using the sg_chain function then that function should be called only once266* on the chained table267* @data: pointer to sg_table to free268*/269static void devcd_free_sgtable(void *data)270{271_devcd_free_sgtable(data);272}273274/**275* devcd_read_from_sgtable - copy data from sg_table to a given buffer276* and return the number of bytes read277* @buffer: the buffer to copy the data to it278* @buf_len: the length of the buffer279* @data: the scatterlist table to copy from280* @offset: start copy from @offset@ bytes from the head of the data281* in the given scatterlist282* @data_len: the length of the data in the sg_table283*284* Returns: the number of bytes copied285*/286static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,287size_t buf_len, void *data,288size_t data_len)289{290struct scatterlist *table = data;291292if (offset > data_len)293return -EINVAL;294295if (offset + buf_len > data_len)296buf_len = data_len - offset;297return sg_pcopy_to_buffer(table, sg_nents(table), buffer, buf_len,298offset);299}300301/**302* dev_coredump_put - remove device coredump303* @dev: the struct device for the crashed device304*305* dev_coredump_put() removes coredump, if exists, for a given device from306* the file system and free its associated data otherwise, does nothing.307*308* It is useful for modules that do not want to keep coredump309* available after its unload.310*/311void dev_coredump_put(struct device *dev)312{313struct device *existing;314315existing = class_find_device(&devcd_class, NULL, dev,316devcd_match_failing);317if (existing) {318devcd_free(existing, NULL);319put_device(existing);320}321}322EXPORT_SYMBOL_GPL(dev_coredump_put);323324/**325* dev_coredumpm_timeout - create device coredump with read/free methods with a326* custom timeout.327* @dev: the struct device for the crashed device328* @owner: the module that contains the read/free functions, use %THIS_MODULE329* @data: data cookie for the @read/@free functions330* @datalen: length of the data331* @gfp: allocation flags332* @read: function to read from the given buffer333* @free: function to free the given buffer334* @timeout: time in jiffies to remove coredump335*336* Creates a new device coredump for the given device. If a previous one hasn't337* been read yet, the new coredump is discarded. The data lifetime is determined338* by the device coredump framework and when it is no longer needed the @free339* function will be called to free the data.340*/341void dev_coredumpm_timeout(struct device *dev, struct module *owner,342void *data, size_t datalen, gfp_t gfp,343ssize_t (*read)(char *buffer, loff_t offset,344size_t count, void *data,345size_t datalen),346void (*free)(void *data),347unsigned long timeout)348{349static atomic_t devcd_count = ATOMIC_INIT(0);350struct devcd_entry *devcd;351struct device *existing;352353if (devcd_disabled)354goto free;355356existing = class_find_device(&devcd_class, NULL, dev,357devcd_match_failing);358if (existing) {359put_device(existing);360goto free;361}362363if (!try_module_get(owner))364goto free;365366devcd = kzalloc(sizeof(*devcd), gfp);367if (!devcd)368goto put_module;369370devcd->owner = owner;371devcd->data = data;372devcd->datalen = datalen;373devcd->read = read;374devcd->free = free;375devcd->failing_dev = get_device(dev);376devcd->delete_work = false;377378mutex_init(&devcd->mutex);379device_initialize(&devcd->devcd_dev);380381dev_set_name(&devcd->devcd_dev, "devcd%d",382atomic_inc_return(&devcd_count));383devcd->devcd_dev.class = &devcd_class;384385mutex_lock(&devcd->mutex);386dev_set_uevent_suppress(&devcd->devcd_dev, true);387if (device_add(&devcd->devcd_dev))388goto put_device;389390/*391* These should normally not fail, but there is no problem392* continuing without the links, so just warn instead of393* failing.394*/395if (sysfs_create_link(&devcd->devcd_dev.kobj, &dev->kobj,396"failing_device") ||397sysfs_create_link(&dev->kobj, &devcd->devcd_dev.kobj,398"devcoredump"))399dev_warn(dev, "devcoredump create_link failed\n");400401dev_set_uevent_suppress(&devcd->devcd_dev, false);402kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);403INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);404schedule_delayed_work(&devcd->del_wk, timeout);405mutex_unlock(&devcd->mutex);406return;407put_device:408put_device(&devcd->devcd_dev);409mutex_unlock(&devcd->mutex);410put_module:411module_put(owner);412free:413free(data);414}415EXPORT_SYMBOL_GPL(dev_coredumpm_timeout);416417/**418* dev_coredumpsg - create device coredump that uses scatterlist as data419* parameter420* @dev: the struct device for the crashed device421* @table: the dump data422* @datalen: length of the data423* @gfp: allocation flags424*425* Creates a new device coredump for the given device. If a previous one hasn't426* been read yet, the new coredump is discarded. The data lifetime is determined427* by the device coredump framework and when it is no longer needed428* it will free the data.429*/430void dev_coredumpsg(struct device *dev, struct scatterlist *table,431size_t datalen, gfp_t gfp)432{433dev_coredumpm(dev, NULL, table, datalen, gfp, devcd_read_from_sgtable,434devcd_free_sgtable);435}436EXPORT_SYMBOL_GPL(dev_coredumpsg);437438static int __init devcoredump_init(void)439{440return class_register(&devcd_class);441}442__initcall(devcoredump_init);443444static void __exit devcoredump_exit(void)445{446class_for_each_device(&devcd_class, NULL, NULL, devcd_free);447class_unregister(&devcd_class);448}449__exitcall(devcoredump_exit);450451452