Path: blob/master/src/packages/frontend/components/html-ssr.tsx
5808 views
/*1React component for rendering an HTML string.23- suitable for server side rendering (e.g., nextjs)4- parses and displays math using KaTeX5- sanitizes the HTML for XSS attacks, etc., so it is safe to display to users6- optionally transforms links78TODO: This should eventually completely replace ./html.tsx:9- syntax highlighting10- searching11- opens links in a new tab, or makes clicking anchor tags runs a function12instead of opening a new tab so can open internal cocalc links inside cocalc.13*/1415import React from "react";16import htmlReactParser, {17attributesToProps,18domToReact,19Element,20Text,21} from "html-react-parser";22import sanitizeHtml from "sanitize-html";23import { useFileContext } from "@cocalc/frontend/lib/file-context";24import DefaultMath from "@cocalc/frontend/components/math/ssr";25import { MathJaxConfig } from "@cocalc/util/mathjax-config";26import { decodeHTML } from "entities";2728const URL_ATTRIBS = ["src", "href", "data"];29const MATH_SKIP_TAGS = new Set<string>(MathJaxConfig.tex2jax.skipTags);3031export default function HTML({32value,33style,34inline,35}: {36value: string;37style?: React.CSSProperties;38inline?: boolean;39}) {40const { urlTransform, AnchorTagComponent, noSanitize, MathComponent } =41useFileContext();42if (!noSanitize) {43value = sanitizeHtml(value, {44allowedTags: sanitizeHtml.defaults.allowedTags.concat(["img", "iframe"]),45allowedAttributes: {46...sanitizeHtml.defaults.allowedAttributes,47iframe: [48"src",49"width",50"height",51"title",52"allow",53"allowfullscreen",54"referrerpolicy",55"loading",56"frameborder",57],58},59allowedIframeHostnames: [60"www.youtube.com",61"youtube.com",62"www.youtube-nocookie.com",63"youtube-nocookie.com",64"player.vimeo.com",65],66});67}68if (value.trimLeft().startsWith("<html>")) {69// Sage output formulas are wrapped in "<html>" for some stupid reason, which70// probably originates with a ridiculous design choice that Tom Boothby or I71// made in 2006 related to "wiki" formatting in Sage notebooks. If we don't strip72// this, then htmlReactParser just deletes the whole documents, since html is73// not a valid tag inside the DOM. We do this in a really minimally flexible way74// to reduce the chances to 0 that we apply this when we shouldn't.75value = value.trim().slice("<html>".length, -"</html>".length);76}77let options: any = {};78options.replace = (domNode) => {79if (!/^[a-zA-Z]+[0-9]?$/.test(domNode.name)) {80// Without this, if user gives html input that is a malformed tag then all of React81// completely crashes, which is not desirable for us. On the other hand, I prefer not82// to always completely sanitize input, since that can do a lot we don't want to do83// and may be expensive. See84// https://github.com/remarkablemark/html-react-parser/issues/60#issuecomment-39858857385return React.createElement(React.Fragment);86}87if (domNode instanceof Text) {88if (hasAncestor(domNode, MATH_SKIP_TAGS)) {89// Do NOT convert Text to math inside a pre/code tree environment.90return;91}92const { data } = domNode;93if (MathComponent != null) {94return <MathComponent data={decodeHTML(data)} />;95}96return <DefaultMath data={decodeHTML(data)} />;97}9899try {100if (!(domNode instanceof Element)) return;101const { name, children, attribs } = domNode;102103if (name == "script") {104const type = domNode.attribs?.type?.toLowerCase();105if (type?.startsWith("math/tex")) {106const child = domNode.children?.[0];107if (child instanceof Text && child.data) {108let data = "$" + decodeHTML(child.data) + "$";109if (type.includes("display")) {110data = "$" + data + "$";111}112if (MathComponent != null) {113return <MathComponent data={data} />;114}115return <DefaultMath data={data} />;116}117}118}119120if (AnchorTagComponent != null && name == "a") {121return (122<AnchorTagComponent {...attribs}>123{domToReact(children as any, options)}124</AnchorTagComponent>125);126}127128if (noSanitize && urlTransform != null && attribs != null) {129// since we did not sanitize the HTML (which also does urlTransform),130// we have to do the urlTransform here instead.131for (const attrib of URL_ATTRIBS) {132if (attribs[attrib] != null) {133const x = urlTransform(attribs[attrib]);134if (x != null) {135const props = attributesToProps(attribs);136props[attrib] = x;137return React.createElement(138name,139props,140children && children?.length > 0141? domToReact(children as any, options)142: undefined,143);144}145}146}147}148} catch (err) {149console.log("WARNING -- issue parsing HTML", err);150}151};152153if (inline) {154return <span style={style}>{htmlReactParser(value, options)}</span>;155} else {156return <div style={style}>{htmlReactParser(value, options)}</div>;157}158}159160function hasAncestor(domNode, tags: Set<string>): boolean {161const { parent } = domNode;162if (!(parent instanceof Element)) return false;163if (tags.has(parent.name)) return true;164return hasAncestor(parent, tags);165}166167168