/*1* *****************************************************************************2*3* SPDX-License-Identifier: BSD-2-Clause4*5* Copyright (c) 2018-2025 Gavin D. Howard and contributors.6*7* Redistribution and use in source and binary forms, with or without8* modification, are permitted provided that the following conditions are met:9*10* * Redistributions of source code must retain the above copyright notice, this11* list of conditions and the following disclaimer.12*13* * Redistributions in binary form must reproduce the above copyright notice,14* this list of conditions and the following disclaimer in the documentation15* and/or other materials provided with the distribution.16*17* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"18* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE19* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE20* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE21* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR22* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF23* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS24* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN25* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)26* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE27* POSSIBILITY OF SUCH DAMAGE.28*29* *****************************************************************************30*31* Generates a const array from a bc script.32*33*/3435#include <assert.h>36#include <stdbool.h>37#include <stdio.h>38#include <stdlib.h>39#include <string.h>4041#include <errno.h>4243#include <fcntl.h>44#include <sys/stat.h>4546#ifndef _WIN3247#include <unistd.h>48#endif // _WIN324950// For some reason, Windows can't have this header.51#ifndef _WIN3252#include <libgen.h>53#endif // _WIN325455// This pulls in cross-platform stuff.56#include <status.h>5758// clang-format off5960// The usage help.61static const char* const bc_gen_usage =62"usage: %s input output exclude name [label [define [remove_tabs]]]\n";6364static const char* const bc_gen_ex_start = "{{ A H N HN }}";65static const char* const bc_gen_ex_end = "{{ end }}";6667// This is exactly what it looks like. It just slaps a simple license header on68// the generated C source file.69static const char* const bc_gen_header =70"// Copyright (c) 2018-2025 Gavin D. Howard and contributors.\n"71"// Licensed under the 2-clause BSD license.\n"72"// *** AUTOMATICALLY GENERATED FROM %s. DO NOT MODIFY. ***\n\n";73// clang-format on7475// These are just format strings used to generate the C source.76static const char* const bc_gen_label = "const char *%s = \"%s\";\n\n";77static const char* const bc_gen_label_extern = "extern const char *%s;\n\n";78static const char* const bc_gen_ifdef = "#if %s\n";79static const char* const bc_gen_endif = "#endif // %s\n";80static const char* const bc_gen_name = "const char %s[] = {\n";81static const char* const bc_gen_name_extern = "extern const char %s[];\n\n";8283// Error codes. We can't use 0 because these are used as exit statuses, and 084// as an exit status is not an error.85#define IO_ERR (1)86#define INVALID_INPUT_FILE (2)87#define INVALID_PARAMS (3)8889// This is the max width to print characters to the screen. This is to ensure90// that lines don't go much over 80 characters.91#define MAX_WIDTH (72)9293/**94* Open a file. This function is to smooth over differences between POSIX and95* Windows.96* @param f A pointer to the FILE pointer that will be initialized.97* @param filename The name of the file.98* @param mode The mode to open the file in.99*/100static void101open_file(FILE** f, const char* filename, const char* mode)102{103#ifndef _WIN32104105*f = fopen(filename, mode);106107#else // _WIN32108109// We want the file pointer to be NULL on failure, but fopen_s() is not110// guaranteed to set it.111*f = NULL;112fopen_s(f, filename, mode);113114#endif // _WIN32115}116117/**118* A portability file open function. This is copied from src/read.c. Make sure119* to update that if this changes.120* @param path The path to the file to open.121* @param mode The mode to open in.122*/123static int124bc_read_open(const char* path, int mode)125{126int fd;127128#ifndef _WIN32129fd = open(path, mode);130#else // _WIN32131fd = -1;132open(&fd, path, mode);133#endif134135return fd;136}137138/**139* Reads a file and returns the file as a string. This has been copied from140* src/read.c. Make sure to change that if this changes.141* @param path The path to the file.142* @return The contents of the file as a string.143*/144static char*145bc_read_file(const char* path)146{147int e = IO_ERR;148size_t size, to_read;149struct stat pstat;150int fd;151char* buf;152char* buf2;153154// This has been copied from src/read.c. Make sure to change that if this155// changes.156157assert(path != NULL);158159#if BC_DEBUG160// Need this to quiet MSan.161// NOLINTNEXTLINE162memset(&pstat, 0, sizeof(struct stat));163#endif // BC_DEBUG164165fd = bc_read_open(path, O_RDONLY);166167// If we can't read a file, we just barf.168if (BC_ERR(fd < 0))169{170fprintf(stderr, "Could not open file: %s\n", path);171exit(INVALID_INPUT_FILE);172}173174// The reason we call fstat is to eliminate TOCTOU race conditions. This175// way, we have an open file, so it's not going anywhere.176if (BC_ERR(fstat(fd, &pstat) == -1))177{178fprintf(stderr, "Could not stat file: %s\n", path);179exit(INVALID_INPUT_FILE);180}181182// Make sure it's not a directory.183if (BC_ERR(S_ISDIR(pstat.st_mode)))184{185fprintf(stderr, "Path is directory: %s\n", path);186exit(INVALID_INPUT_FILE);187}188189// Get the size of the file and allocate that much.190size = (size_t) pstat.st_size;191buf = (char*) malloc(size + 1);192if (buf == NULL)193{194fprintf(stderr, "Could not malloc\n");195exit(INVALID_INPUT_FILE);196}197buf2 = buf;198to_read = size;199200do201{202// Read the file. We just bail if a signal interrupts. This is so that203// users can interrupt the reading of big files if they want.204ssize_t r = read(fd, buf2, to_read);205if (BC_ERR(r < 0)) exit(e);206to_read -= (size_t) r;207buf2 += (size_t) r;208}209while (to_read);210211// Got to have a nul byte.212buf[size] = '\0';213214close(fd);215216return buf;217}218219/**220* Outputs a label, which is a string literal that the code can use as a name221* for the file that is being turned into a string. This is important for the222* math libraries because the parse and lex code expects a filename. The label223* becomes the filename for the purposes of lexing and parsing.224*225* The label is generated from bc_gen_label (above). It has the form:226*227* const char *<label_name> = <label>;228*229* This function is also needed to smooth out differences between POSIX and230* Windows, specifically, the fact that Windows uses backslashes for filenames231* and that backslashes have to be escaped in a string literal.232*233* @param out The file to output to.234* @param label The label name.235* @param name The actual label text, which is a filename.236* @return Positive if no error, negative on error, just like *printf().237*/238static int239output_label(FILE* out, const char* label, const char* name)240{241#ifndef _WIN32242243return fprintf(out, bc_gen_label, label, name);244245#else // _WIN32246247size_t i, count = 0, len = strlen(name);248char* buf;249int ret;250251// This loop counts how many backslashes there are in the label.252for (i = 0; i < len; ++i)253{254count += (name[i] == '\\');255}256257buf = (char*) malloc(len + 1 + count);258if (buf == NULL) return -1;259260count = 0;261262// This loop is the meat of the Windows version. What it does is copy the263// label byte-for-byte, unless it encounters a backslash, in which case, it264// copies the backslash twice to have it escaped properly in the string265// literal.266for (i = 0; i < len; ++i)267{268buf[i + count] = name[i];269270if (name[i] == '\\')271{272count += 1;273buf[i + count] = name[i];274}275}276277buf[i + count] = '\0';278279ret = fprintf(out, bc_gen_label, label, buf);280281free(buf);282283return ret;284285#endif // _WIN32286}287288/**289* This program generates C strings (well, actually, C char arrays) from text290* files. It generates 1 C source file. The resulting file has this structure:291*292* <Copyright Header>293*294* [<Label Extern>]295*296* <Char Array Extern>297*298* [<Preprocessor Guard Begin>]299* [<Label Definition>]300*301* <Char Array Definition>302* [<Preprocessor Guard End>]303*304* Anything surrounded by square brackets may not be in the final generated305* source file.306*307* The required command-line parameters are:308*309* input Input filename.310* output Output filename.311* exclude Whether to exclude extra math-only stuff.312* name The name of the char array.313*314* The optional parameters are:315*316* label If given, a label for the char array. See the comment for the317* output_label() function. It is meant as a "filename" for the318* text when processed by bc and dc. If label is given, then the319* <Label Extern> and <Label Definition> will exist in the320* generated source file.321* define If given, a preprocessor macro that should be used as a guard322* for the char array and its label. If define is given, then323* <Preprocessor Guard Begin> will exist in the form324* "#if <define>" as part of the generated source file, and325* <Preprocessor Guard End> will exist in the form326* "endif // <define>".327* remove_tabs If this parameter exists, it must be an integer. If it is328* non-zero, then tabs are removed from the input file text before329* outputting to the output char array.330*331* All text files that are transformed have license comments. This program finds332* the end of that comment and strips it out as well.333*/334int335main(int argc, char* argv[])336{337char* in;338FILE* out;339const char* label;340const char* define;341char* name;342unsigned int count, slashes, err = IO_ERR;343bool has_label, has_define, remove_tabs, exclude_extra_math;344size_t i;345346if (argc < 5)347{348printf(bc_gen_usage, argv[0]);349return INVALID_PARAMS;350}351352exclude_extra_math = (strtoul(argv[3], NULL, 10) != 0);353354name = argv[4];355356has_label = (argc > 5 && strcmp("", argv[5]) != 0);357label = has_label ? argv[5] : "";358359has_define = (argc > 6 && strcmp("", argv[6]) != 0);360define = has_define ? argv[6] : "";361362remove_tabs = (argc > 7 && atoi(argv[7]) != 0);363364in = bc_read_file(argv[1]);365if (in == NULL) return INVALID_INPUT_FILE;366367open_file(&out, argv[2], "w");368if (out == NULL) goto out_err;369370if (fprintf(out, bc_gen_header, argv[1]) < 0) goto err;371if (has_label && fprintf(out, bc_gen_label_extern, label) < 0) goto err;372if (fprintf(out, bc_gen_name_extern, name) < 0) goto err;373if (has_define && fprintf(out, bc_gen_ifdef, define) < 0) goto err;374if (has_label && output_label(out, label, argv[1]) < 0) goto err;375if (fprintf(out, bc_gen_name, name) < 0) goto err;376377i = count = slashes = 0;378379// This is where the end of the license comment is found.380while (slashes < 2 && in[i] > 0)381{382if (slashes == 1 && in[i] == '*' && in[i + 1] == '/' &&383(in[i + 2] == '\n' || in[i + 2] == '\r'))384{385slashes += 1;386i += 2;387}388else if (!slashes && in[i] == '/' && in[i + 1] == '*')389{390slashes += 1;391i += 1;392}393394i += 1;395}396397// The file is invalid if the end of the license comment could not be found.398if (in[i] == 0)399{400fprintf(stderr, "Could not find end of license comment\n");401err = INVALID_INPUT_FILE;402goto err;403}404405i += 1;406407// Do not put extra newlines at the beginning of the char array.408while (in[i] == '\n' || in[i] == '\r')409{410i += 1;411}412413// This loop is what generates the actual char array. It counts how many414// chars it has printed per line in order to insert newlines at appropriate415// places. It also skips tabs if they should be removed.416while (in[i] != 0)417{418int val;419420if (in[i] == '\r')421{422i += 1;423continue;424}425426if (!remove_tabs || in[i] != '\t')427{428// Check for excluding something for extra math.429if (in[i] == '{')430{431// If we found the start...432if (!strncmp(in + i, bc_gen_ex_start, strlen(bc_gen_ex_start)))433{434if (exclude_extra_math)435{436// Get past the braces.437i += 2;438439// Find the end of the end.440while (in[i] != '{' && strncmp(in + i, bc_gen_ex_end,441strlen(bc_gen_ex_end)))442{443i += 1;444}445446i += strlen(bc_gen_ex_end);447448// Skip the last newline.449if (in[i] == '\r') i += 1;450i += 1;451continue;452}453else454{455i += strlen(bc_gen_ex_start);456457// Skip the last newline.458if (in[i] == '\r') i += 1;459i += 1;460continue;461}462}463else if (!exclude_extra_math &&464!strncmp(in + i, bc_gen_ex_end, strlen(bc_gen_ex_end)))465{466i += strlen(bc_gen_ex_end);467468// Skip the last newline.469if (in[i] == '\r') i += 1;470i += 1;471continue;472}473}474475// Print a tab if we are at the beginning of a line.476if (!count && fputc('\t', out) == EOF) goto err;477478// Print the character.479val = fprintf(out, "%d,", in[i]);480if (val < 0) goto err;481482// Adjust the count.483count += (unsigned int) val;484if (count > MAX_WIDTH)485{486count = 0;487if (fputc('\n', out) == EOF) goto err;488}489}490491i += 1;492}493494// Make sure the end looks nice and insert the NUL byte at the end.495if (!count && (fputc(' ', out) == EOF || fputc(' ', out) == EOF)) goto err;496if (fprintf(out, "0\n};\n") < 0) goto err;497498err = (has_define && fprintf(out, bc_gen_endif, define) < 0);499500err:501fclose(out);502out_err:503free(in);504return (int) err;505}506507508