Path: blob/master/src/packages/frontend/editors/slate/elements/code-block/info-to-mode.ts
1698 views
import { file_associations } from "@cocalc/frontend/file-associations";1import detectLanguage from "@cocalc/frontend/misc/detect-language";23// Convert the info string for a fenced code block to a codemirror mode4// when preferKernel is true return the actual kernel name or language.5export default function infoToMode(6info: string | undefined | null,7options: { value?: string; preferKernel?: boolean } = {},8): string {9const { value, preferKernel } = options;10info = info?.trim().toLowerCase();11if (!info) {12if (!value) return ""; // no info13info = detectLanguage(value);14}1516if (info == "mermaid") {17return "md";18}1920// Format that seems to work well with github (unlike python-markdown and rmarkdown!), and we21// use internally, e.g.,22// py {kernel='sage-9.8'} or py {kernel="sage-9.8"}23// so we have extra info in braces. Github just looks at the "python" part.24if (preferKernel) {25// extra the string that is after kernel as in the examples above, e.g., sage-9.826const kernelMatch = /kernel\s*=\s*[\'\"](.*?)[\'\"]/i.exec(info);27if (kernelMatch) {28return kernelMatch[1];29}30}3132// Rmarkdown format -- looks like {r stuff,engine=python,stuff}.33// https://github.com/yihui/knitr-examples/blob/master/023-engine-python.Rmd34// ```{r test-python, engine='python'}35// ```{python}36// strip leading { and trailing }37// Also "python-markdown" uses these braces, though differently.38// https://python-markdown.github.io/extensions/fenced_code_blocks39// ``` { .html .foo .bar }40if (info[0] == "{") {41info = info.slice(1, -1).trim();42if (preferKernel) {43const i = info.indexOf("kernel=");44if (i != -1) {45let mode = firstWord(info.slice(i + "kernel=".length));46if (mode.startsWith("'") || mode.startsWith('"')) {47mode = mode.slice(1, -1);48}49return mode;50}51}52}53info = info.toLowerCase().trim(); // our file_associations data all assumes lower case.5455// The mode specifier is then the first word before any blank56let mode = firstWord(info);57// mode can have a leading dot which we ignore, e.g., see58// https://python-markdown.github.io/extensions/fenced_code_blocks/59if (mode[0] == ".") {60mode = mode.slice(1);61}6263if (mode == "r") {64// If the mode is R then they optionally use an 'engine=' option to specify a65// different mode entirely (in rmd), e.g., {r test-python, engine='python'}66const i = info.indexOf("engine=");67if (i != -1) {68mode = firstWord(info.slice(i + "engine=".length));69if (mode.startsWith("'") || mode.startsWith('"')) {70mode = mode.slice(1, -1);71}72}73}7475if (76preferKernel &&77(mode.startsWith("sage") ||78mode.startsWith("octave") ||79mode == "m" ||80mode.startsWith("julia") ||81mode == "jl" ||82mode.startsWith("python"))83) {84if (mode == "sage") {85// it's nice for users to be able to type "sage" to get sage mode (since it's .sage file),86// but the language for the sage kernels is always "sagemath".87return "sagemath";88}89if (mode == "jl") {90// similar remark about julia as for sage above91return "julia";92}93if (mode == "m") {94return "octave";95}96return mode;97}9899let spec = file_associations[mode];100101if (preferKernel) {102if (spec?.opts.mode == "shell") {103// there is usually a bash kernel installed104return "bash";105}106}107108if (spec == null) {109// the keys of file_associations is (mostly) just the filename extension.110// It's nice to also support matching the mime type of a codemirror mode partly, in case111// the extension isn't found.112for (const ext in file_associations) {113const cmmode = file_associations[ext].opts?.mode;114if (cmmode != null) {115if (116cmmode == mode ||117(cmmode.startsWith("text/x-") && cmmode == "text/x-" + mode)118) {119return cmmode;120}121}122}123}124125return spec?.opts.mode ?? info; // if nothing in file associations, maybe info is the mode, e.g. "python".126}127128// Return the first word in the string s, where words are separated by whitespace or commas129// @param s the string to extract first word from130// @returns the first word in the string131function firstWord(s: string): string {132// Use a regular expression to remove everything after the first comma, and then splits133// the remaining string at any whitespace to return the first word. - chatgpt134return s.replace(/,.*/, "").split(/\s+/)[0];135}136137138