Path: blob/master/support/startup/PhabricatorStartup.php
12240 views
<?php12/**3* Handle request startup, before loading the environment or libraries. This4* class bootstraps the request state up to the point where we can enter5* Phabricator code.6*7* NOTE: This class MUST NOT have any dependencies. It runs before libraries8* load.9*10* Rate Limiting11* =============12*13* Phabricator limits the rate at which clients can request pages, and issues14* HTTP 429 "Too Many Requests" responses if clients request too many pages too15* quickly. Although this is not a complete defense against high-volume attacks,16* it can protect an install against aggressive crawlers, security scanners,17* and some types of malicious activity.18*19* To perform rate limiting, each page increments a score counter for the20* requesting user's IP. The page can give the IP more points for an expensive21* request, or fewer for an authetnicated request.22*23* Score counters are kept in buckets, and writes move to a new bucket every24* minute. After a few minutes (defined by @{method:getRateLimitBucketCount}),25* the oldest bucket is discarded. This provides a simple mechanism for keeping26* track of scores without needing to store, access, or read very much data.27*28* Users are allowed to accumulate up to 1000 points per minute, averaged across29* all of the tracked buckets.30*31* @task info Accessing Request Information32* @task hook Startup Hooks33* @task apocalypse In Case Of Apocalypse34* @task validation Validation35* @task ratelimit Rate Limiting36* @task phases Startup Phase Timers37* @task request-path Request Path38*/39final class PhabricatorStartup {4041private static $startTime;42private static $debugTimeLimit;43private static $accessLog;44private static $capturingOutput;45private static $rawInput;46private static $oldMemoryLimit;47private static $phases;4849private static $limits = array();50private static $requestPath;515253/* -( Accessing Request Information )-------------------------------------- */545556/**57* @task info58*/59public static function getStartTime() {60return self::$startTime;61}626364/**65* @task info66*/67public static function getMicrosecondsSinceStart() {68// This is the same as "phutil_microseconds_since()", but we may not have69// loaded libraries yet.70return (int)(1000000 * (microtime(true) - self::getStartTime()));71}727374/**75* @task info76*/77public static function setAccessLog($access_log) {78self::$accessLog = $access_log;79}808182/**83* @task info84*/85public static function getRawInput() {86if (self::$rawInput === null) {87$stream = new AphrontRequestStream();8889if (isset($_SERVER['HTTP_CONTENT_ENCODING'])) {90$encoding = trim($_SERVER['HTTP_CONTENT_ENCODING']);91$stream->setEncoding($encoding);92}9394$input = '';95do {96$bytes = $stream->readData();97if ($bytes === null) {98break;99}100$input .= $bytes;101} while (true);102103self::$rawInput = $input;104}105106return self::$rawInput;107}108109110/* -( Startup Hooks )------------------------------------------------------ */111112113/**114* @param float Request start time, from `microtime(true)`.115* @task hook116*/117public static function didStartup($start_time) {118self::$startTime = $start_time;119120self::$phases = array();121122self::$accessLog = null;123self::$requestPath = null;124125static $registered;126if (!$registered) {127// NOTE: This protects us against multiple calls to didStartup() in the128// same request, but also against repeated requests to the same129// interpreter state, which we may implement in the future.130register_shutdown_function(array(__CLASS__, 'didShutdown'));131$registered = true;132}133134self::setupPHP();135self::verifyPHP();136137// If we've made it this far, the environment isn't completely broken so138// we can switch over to relying on our own exception recovery mechanisms.139ini_set('display_errors', 0);140141self::connectRateLimits();142143self::normalizeInput();144145self::readRequestPath();146147self::beginOutputCapture();148}149150151/**152* @task hook153*/154public static function didShutdown() {155// Disconnect any active rate limits before we shut down. If we don't do156// this, requests which exit early will lock a slot in any active157// connection limits, and won't count for rate limits.158self::disconnectRateLimits(array());159160$event = error_get_last();161162if (!$event) {163return;164}165166switch ($event['type']) {167case E_ERROR:168case E_PARSE:169case E_COMPILE_ERROR:170break;171default:172return;173}174175$msg = ">>> UNRECOVERABLE FATAL ERROR <<<\n\n";176if ($event) {177// Even though we should be emitting this as text-plain, escape things178// just to be sure since we can't really be sure what the program state179// is when we get here.180$msg .= htmlspecialchars(181$event['message']."\n\n".$event['file'].':'.$event['line'],182ENT_QUOTES,183'UTF-8');184}185186// flip dem tables187$msg .= "\n\n\n";188$msg .= "\xe2\x94\xbb\xe2\x94\x81\xe2\x94\xbb\x20\xef\xb8\xb5\x20\xc2\xaf".189"\x5c\x5f\x28\xe3\x83\x84\x29\x5f\x2f\xc2\xaf\x20\xef\xb8\xb5\x20".190"\xe2\x94\xbb\xe2\x94\x81\xe2\x94\xbb";191192self::didFatal($msg);193}194195public static function loadCoreLibraries() {196$phabricator_root = dirname(dirname(dirname(__FILE__)));197$libraries_root = dirname($phabricator_root);198199$root = null;200if (!empty($_SERVER['PHUTIL_LIBRARY_ROOT'])) {201$root = $_SERVER['PHUTIL_LIBRARY_ROOT'];202}203204ini_set(205'include_path',206$libraries_root.PATH_SEPARATOR.ini_get('include_path'));207208$ok = @include_once $root.'arcanist/src/init/init-library.php';209if (!$ok) {210self::didFatal(211'Unable to load the "Arcanist" library. Put "arcanist/" next to '.212'"phabricator/" on disk.');213}214215// Load Phabricator itself using the absolute path, so we never end up doing216// anything surprising (loading index.php and libraries from different217// directories).218phutil_load_library($phabricator_root.'/src');219}220221/* -( Output Capture )----------------------------------------------------- */222223224public static function beginOutputCapture() {225if (self::$capturingOutput) {226self::didFatal('Already capturing output!');227}228self::$capturingOutput = true;229ob_start();230}231232233public static function endOutputCapture() {234if (!self::$capturingOutput) {235return null;236}237self::$capturingOutput = false;238return ob_get_clean();239}240241242/* -( Debug Time Limit )--------------------------------------------------- */243244245/**246* Set a time limit (in seconds) for the current script. After time expires,247* the script fatals.248*249* This works like `max_execution_time`, but prints out a useful stack trace250* when the time limit expires. This is primarily intended to make it easier251* to debug pages which hang by allowing extraction of a stack trace: set a252* short debug limit, then use the trace to figure out what's happening.253*254* The limit is implemented with a tick function, so enabling it implies255* some accounting overhead.256*257* @param int Time limit in seconds.258* @return void259*/260public static function setDebugTimeLimit($limit) {261self::$debugTimeLimit = $limit;262263static $initialized;264if (!$initialized) {265declare(ticks=1);266register_tick_function(array(__CLASS__, 'onDebugTick'));267}268}269270271/**272* Callback tick function used by @{method:setDebugTimeLimit}.273*274* Fatals with a useful stack trace after the time limit expires.275*276* @return void277*/278public static function onDebugTick() {279$limit = self::$debugTimeLimit;280if (!$limit) {281return;282}283284$elapsed = (microtime(true) - self::getStartTime());285if ($elapsed > $limit) {286$frames = array();287foreach (debug_backtrace() as $frame) {288$file = isset($frame['file']) ? $frame['file'] : '-';289$file = basename($file);290291$line = isset($frame['line']) ? $frame['line'] : '-';292$class = isset($frame['class']) ? $frame['class'].'->' : null;293$func = isset($frame['function']) ? $frame['function'].'()' : '?';294295$frames[] = "{$file}:{$line} {$class}{$func}";296}297298self::didFatal(299"Request aborted by debug time limit after {$limit} seconds.\n\n".300"STACK TRACE\n".301implode("\n", $frames));302}303}304305306/* -( In Case of Apocalypse )---------------------------------------------- */307308309/**310* Fatal the request completely in response to an exception, sending a plain311* text message to the client. Calls @{method:didFatal} internally.312*313* @param string Brief description of the exception context, like314* `"Rendering Exception"`.315* @param Throwable The exception itself.316* @param bool True if it's okay to show the exception's stack trace317* to the user. The trace will always be logged.318* @return exit This method **does not return**.319*320* @task apocalypse321*/322public static function didEncounterFatalException(323$note,324$ex,325$show_trace) {326327$message = '['.$note.'/'.get_class($ex).'] '.$ex->getMessage();328329$full_message = $message;330$full_message .= "\n\n";331$full_message .= $ex->getTraceAsString();332333if ($show_trace) {334$message = $full_message;335}336337self::didFatal($message, $full_message);338}339340341/**342* Fatal the request completely, sending a plain text message to the client.343*344* @param string Plain text message to send to the client.345* @param string Plain text message to send to the error log. If not346* provided, the client message is used. You can pass a more347* detailed message here (e.g., with stack traces) to avoid348* showing it to users.349* @return exit This method **does not return**.350*351* @task apocalypse352*/353public static function didFatal($message, $log_message = null) {354if ($log_message === null) {355$log_message = $message;356}357358self::endOutputCapture();359$access_log = self::$accessLog;360if ($access_log) {361// We may end up here before the access log is initialized, e.g. from362// verifyPHP().363$access_log->setData(364array(365'c' => 500,366));367$access_log->write();368}369370header(371'Content-Type: text/plain; charset=utf-8',372$replace = true,373$http_error = 500);374375error_log($log_message);376echo $message."\n";377378exit(1);379}380381382/* -( Validation )--------------------------------------------------------- */383384385/**386* @task validation387*/388private static function setupPHP() {389error_reporting(E_ALL | E_STRICT);390self::$oldMemoryLimit = ini_get('memory_limit');391ini_set('memory_limit', -1);392393// If we have libxml, disable the incredibly dangerous entity loader.394// PHP 8 deprecates this function and disables this by default; remove once395// PHP 7 is no longer supported or a future version has removed the function396// entirely.397if (function_exists('libxml_disable_entity_loader')) {398@libxml_disable_entity_loader(true);399}400401// See T13060. If the locale for this process (the parent process) is not402// a UTF-8 locale we can encounter problems when launching subprocesses403// which receive UTF-8 parameters in their command line argument list.404@setlocale(LC_ALL, 'en_US.UTF-8');405406$config_map = array(407// See PHI1894. Keep "args" in exception backtraces.408'zend.exception_ignore_args' => 0,409410// See T13100. We'd like the regex engine to fail, rather than segfault,411// if handed a pathological regular expression.412'pcre.backtrack_limit' => 10000,413'pcre.recusion_limit' => 10000,414415// NOTE: Arcanist applies a similar set of startup options for CLI416// environments in "init-script.php". Changes here may also be417// appropriate to apply there.418);419420foreach ($config_map as $config_key => $config_value) {421ini_set($config_key, $config_value);422}423}424425426/**427* @task validation428*/429public static function getOldMemoryLimit() {430return self::$oldMemoryLimit;431}432433/**434* @task validation435*/436private static function normalizeInput() {437// Replace superglobals with unfiltered versions, disrespect php.ini (we438// filter ourselves).439440// NOTE: We don't filter INPUT_SERVER because we don't want to overwrite441// changes made in "preamble.php".442443// NOTE: WE don't filter INPUT_POST because we may be constructing it444// lazily if "enable_post_data_reading" is disabled.445446$filter = array(447INPUT_GET,448INPUT_ENV,449INPUT_COOKIE,450);451foreach ($filter as $type) {452$filtered = filter_input_array($type, FILTER_UNSAFE_RAW);453if (!is_array($filtered)) {454continue;455}456switch ($type) {457case INPUT_GET:458$_GET = array_merge($_GET, $filtered);459break;460case INPUT_COOKIE:461$_COOKIE = array_merge($_COOKIE, $filtered);462break;463case INPUT_ENV;464$env = array_merge($_ENV, $filtered);465$_ENV = self::filterEnvSuperglobal($env);466break;467}468}469470self::rebuildRequest();471}472473/**474* @task validation475*/476public static function rebuildRequest() {477// Rebuild $_REQUEST, respecting order declared in ".ini" files.478$order = ini_get('request_order');479480if (!$order) {481$order = ini_get('variables_order');482}483484if (!$order) {485// $_REQUEST will be empty, so leave it alone.486return;487}488489$_REQUEST = array();490for ($ii = 0; $ii < strlen($order); $ii++) {491switch ($order[$ii]) {492case 'G':493$_REQUEST = array_merge($_REQUEST, $_GET);494break;495case 'P':496$_REQUEST = array_merge($_REQUEST, $_POST);497break;498case 'C':499$_REQUEST = array_merge($_REQUEST, $_COOKIE);500break;501default:502// $_ENV and $_SERVER never go into $_REQUEST.503break;504}505}506}507508509/**510* Adjust `$_ENV` before execution.511*512* Adjustments here primarily impact the environment as seen by subprocesses.513* The environment is forwarded explicitly by @{class:ExecFuture}.514*515* @param map<string, wild> Input `$_ENV`.516* @return map<string, string> Suitable `$_ENV`.517* @task validation518*/519private static function filterEnvSuperglobal(array $env) {520521// In some configurations, we may get "argc" and "argv" set in $_ENV.522// These are not real environmental variables, and "argv" may have an array523// value which can not be forwarded to subprocesses. Remove these from the524// environment if they are present.525unset($env['argc']);526unset($env['argv']);527528return $env;529}530531532/**533* @task validation534*/535private static function verifyPHP() {536$required_version = '5.2.3';537if (version_compare(PHP_VERSION, $required_version) < 0) {538self::didFatal(539"You are running PHP version '".PHP_VERSION."', which is older than ".540"the minimum version, '{$required_version}'. Update to at least ".541"'{$required_version}'.");542}543544if (function_exists('get_magic_quotes_gpc')) {545if (@get_magic_quotes_gpc()) {546self::didFatal(547'Your server is configured with the PHP language feature '.548'"magic_quotes_gpc" enabled.'.549"\n\n".550'This feature is "highly discouraged" by PHP\'s developers, and '.551'has been removed entirely in PHP8.'.552"\n\n".553'You must disable "magic_quotes_gpc" to run Phabricator. Consult '.554'the PHP manual for instructions.');555}556}557558if (extension_loaded('apc')) {559$apc_version = phpversion('apc');560$known_bad = array(561'3.1.14' => true,562'3.1.15' => true,563'3.1.15-dev' => true,564);565if (isset($known_bad[$apc_version])) {566self::didFatal(567"You have APC {$apc_version} installed. This version of APC is ".568"known to be bad, and does not work with Phabricator (it will ".569"cause Phabricator to fatal unrecoverably with nonsense errors). ".570"Downgrade to version 3.1.13.");571}572}573574if (isset($_SERVER['HTTP_PROXY'])) {575self::didFatal(576'This HTTP request included a "Proxy:" header, poisoning the '.577'environment (CVE-2016-5385 / httpoxy). Declining to process this '.578'request. For details, see: https://phurl.io/u/httpoxy');579}580}581582583/**584* @task request-path585*/586private static function readRequestPath() {587588// See T13575. The request path may be provided in:589//590// - the "$_GET" parameter "__path__" (normal for Apache and nginx); or591// - the "$_SERVER" parameter "REQUEST_URI" (normal for the PHP builtin592// webserver).593//594// Locate it wherever it is, and store it for later use. Note that writing595// to "$_REQUEST" here won't always work, because later code may rebuild596// "$_REQUEST" from other sources.597598if (isset($_REQUEST['__path__']) && strlen($_REQUEST['__path__'])) {599self::setRequestPath($_REQUEST['__path__']);600return;601}602603// Compatibility with PHP 5.4+ built-in web server.604if (php_sapi_name() == 'cli-server') {605$path = parse_url($_SERVER['REQUEST_URI']);606self::setRequestPath($path['path']);607return;608}609610if (!isset($_REQUEST['__path__'])) {611self::didFatal(612"Request parameter '__path__' is not set. Your rewrite rules ".613"are not configured correctly.");614}615616if (!strlen($_REQUEST['__path__'])) {617self::didFatal(618"Request parameter '__path__' is set, but empty. Your rewrite rules ".619"are not configured correctly. The '__path__' should always ".620"begin with a '/'.");621}622}623624/**625* @task request-path626*/627public static function getRequestPath() {628$path = self::$requestPath;629630if ($path === null) {631self::didFatal(632'Request attempted to access request path, but no request path is '.633'available for this request. You may be calling web request code '.634'from a non-request context, or your webserver may not be passing '.635'a request path to Phabricator in a format that it understands.');636}637638return $path;639}640641/**642* @task request-path643*/644public static function setRequestPath($path) {645self::$requestPath = $path;646}647648649/* -( Rate Limiting )------------------------------------------------------ */650651652/**653* Add a new client limits.654*655* @param PhabricatorClientLimit New limit.656* @return PhabricatorClientLimit The limit.657*/658public static function addRateLimit(PhabricatorClientLimit $limit) {659self::$limits[] = $limit;660return $limit;661}662663664/**665* Apply configured rate limits.666*667* If any limit is exceeded, this method terminates the request.668*669* @return void670* @task ratelimit671*/672private static function connectRateLimits() {673$limits = self::$limits;674675$reason = null;676$connected = array();677foreach ($limits as $limit) {678$reason = $limit->didConnect();679$connected[] = $limit;680if ($reason !== null) {681break;682}683}684685// If we're killing the request here, disconnect any limits that we686// connected to try to keep the accounting straight.687if ($reason !== null) {688foreach ($connected as $limit) {689$limit->didDisconnect(array());690}691692self::didRateLimit($reason);693}694}695696697/**698* Tear down rate limiting and allow limits to score the request.699*700* @param map<string, wild> Additional, freeform request state.701* @return void702* @task ratelimit703*/704public static function disconnectRateLimits(array $request_state) {705$limits = self::$limits;706707// Remove all limits before disconnecting them so this works properly if708// it runs twice. (We run this automatically as a shutdown handler.)709self::$limits = array();710711foreach ($limits as $limit) {712$limit->didDisconnect($request_state);713}714}715716717718/**719* Emit an HTTP 429 "Too Many Requests" response (indicating that the user720* has exceeded application rate limits) and exit.721*722* @return exit This method **does not return**.723* @task ratelimit724*/725private static function didRateLimit($reason) {726header(727'Content-Type: text/plain; charset=utf-8',728$replace = true,729$http_error = 429);730731echo $reason;732733exit(1);734}735736737/* -( Startup Timers )----------------------------------------------------- */738739740/**741* Record the beginning of a new startup phase.742*743* For phases which occur before @{class:PhabricatorStartup} loads, save the744* time and record it with @{method:recordStartupPhase} after the class is745* available.746*747* @param string Phase name.748* @task phases749*/750public static function beginStartupPhase($phase) {751self::recordStartupPhase($phase, microtime(true));752}753754755/**756* Record the start time of a previously executed startup phase.757*758* For startup phases which occur after @{class:PhabricatorStartup} loads,759* use @{method:beginStartupPhase} instead. This method can be used to760* record a time before the class loads, then hand it over once the class761* becomes available.762*763* @param string Phase name.764* @param float Phase start time, from `microtime(true)`.765* @task phases766*/767public static function recordStartupPhase($phase, $time) {768self::$phases[$phase] = $time;769}770771772/**773* Get information about startup phase timings.774*775* Sometimes, performance problems can occur before we start the profiler.776* Since the profiler can't examine these phases, it isn't useful in777* understanding their performance costs.778*779* Instead, the startup process marks when it enters various phases using780* @{method:beginStartupPhase}. A later call to this method can retrieve this781* information, which can be examined to gain greater insight into where782* time was spent. The output is still crude, but better than nothing.783*784* @task phases785*/786public static function getPhases() {787return self::$phases;788}789790}791792793