<?php
/**
* @defgroup core Core
* Core web application concerns such as routing, dispatching, etc.
*/
/**
* @file classes/core/Core.php
*
* Copyright (c) 2014-2021 Simon Fraser University
* Copyright (c) 2000-2021 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class Core
*
* @ingroup core
*
* @brief Class containing system-wide functions.
*/
namespace PKP\core;
use Exception;
use PKP\cache\CacheManager;
use PKP\cache\FileCache;
use PKP\config\Config;
use SplFileInfo;
define('PKP_LIB_PATH', 'lib/pkp');
define('COUNTER_USER_AGENTS_FILE', Core::getBaseDir() . '/' . PKP_LIB_PATH . '/lib/counterBots/generated/COUNTER_Robots_list.txt');
class Core
{
/** @var array The regular expressions that will find a bot user agent */
public static $botRegexps = [];
/**
* Get the path to the base installation directory.
*
* @return string
*/
public static function getBaseDir()
{
static $baseDir;
return $baseDir ??= dirname(INDEX_FILE_LOCATION);
}
/**
* Sanitize a value to be used in a file path.
* Removes any characters except alphanumeric characters, underscores, and dashes.
*
* @param string $var
*
* @return string
*/
public static function cleanFileVar($var)
{
return cleanFileVar($var);
}
/**
* Return the current date in ISO (YYYY-MM-DD HH:MM:SS) format.
*
* @param int $ts optional, use specified timestamp instead of current time
*
* @return string
*/
public static function getCurrentDate($ts = null)
{
return date('Y-m-d H:i:s', $ts ?? time());
}
/**
* Return *nix timestamp with microseconds (in units of seconds).
*
* @return float
*/
public static function microtime()
{
[$usec, $sec] = explode(' ', microtime());
return (float)$sec + (float)$usec;
}
/**
* Check if the server platform is Windows.
*
* @return bool
*/
public static function isWindows()
{
return strtolower_codesafe(substr(PHP_OS, 0, 3)) == 'win';
}
/**
* Checks to see if a PHP module is enabled.
*
* @param string $moduleName
*
* @return bool
*/
public static function checkGeneralPHPModule($moduleName)
{
if (extension_loaded($moduleName)) {
return true;
}
return false;
}
/**
* Check the passed user agent for a bot.
*
* @param string $userAgent
* @param string $botRegexpsFile An alternative file with regular
* expressions to find bots inside user agent strings.
*
* @return bool
*/
public static function isUserAgentBot($userAgent, $botRegexpsFile = COUNTER_USER_AGENTS_FILE)
{
static $botRegexps;
Registry::set('currentUserAgentsFile', $botRegexpsFile);
if (!isset($botRegexps[$botRegexpsFile])) {
$botFileCacheId = md5($botRegexpsFile);
$cacheManager = CacheManager::getManager();
/** @var FileCache */
$cache = $cacheManager->getCache('core', $botFileCacheId, ['Core', '_botFileListCacheMiss'], CACHE_TYPE_FILE);
$botRegexps[$botRegexpsFile] = $cache->getContents();
}
foreach ($botRegexps[$botRegexpsFile] as $regexp) {
// make the search case insensitive
$regexp .= 'i';
if (PKPString::regexp_match($regexp, $userAgent)) {
return true;
}
}
return false;
}
/**
* Get context path present into the passed
* url information.
*
* @param string $urlInfo Full url or just path info.
*/
public static function getContextPath(string $urlInfo): string
{
$contextPaths = explode('/', trim($urlInfo, '/'), 2);
return self::cleanFileVar($contextPaths[0] ?: 'index');
}
/**
* Get the page present into
* the passed url information. It expects that urls
* were built using the system.
*
* @param string $urlInfo Full url or just path info.
* @param bool $isPathInfo Tell if the
* passed url info string is a path info or not.
* @param array $userVars (optional) Pass GET variables
* if needed (for testing only).
*
* @return string
*/
public static function getPage($urlInfo, $isPathInfo, $userVars = [])
{
$page = Core::_getUrlComponents($urlInfo, $isPathInfo, 0, 'page', $userVars);
return Core::cleanFileVar(is_null($page) ? '' : $page);
}
/**
* Get the operation present into
* the passed url information. It expects that urls
* were built using the system.
*
* @param string $urlInfo Full url or just path info.
* @param bool $isPathInfo Tell if the
* passed url info string is a path info or not.
* @param array $userVars (optional) Pass GET variables
* if needed (for testing only).
*
* @return string
*/
public static function getOp($urlInfo, $isPathInfo, $userVars = [])
{
$operation = Core::_getUrlComponents($urlInfo, $isPathInfo, 1, 'op', $userVars);
return Core::cleanFileVar(empty($operation) ? 'index' : $operation);
}
/**
* Get the arguments present into
* the passed url information (not GET/POST arguments,
* only arguments appended to the URL separated by "/").
* It expects that urls were built using the system.
*
* @param string $urlInfo Full url or just path info.
* @param bool $isPathInfo Tell if the
* passed url info string is a path info or not.
* @param array $userVars (optional) Pass GET variables
* if needed (for testing only).
*
* @return array
*/
public static function getArgs($urlInfo, $isPathInfo, $userVars = [])
{
return Core::_getUrlComponents($urlInfo, $isPathInfo, 2, 'path', $userVars);
}
/**
* Remove base url from the passed url, if any.
* Also, if true, checks for the context path in
* url and if it's missing, tries to add it.
*
* @param string $url
*
* @return string|bool The url without base url,
* false if it was not possible to remove it.
*/
public static function removeBaseUrl($url)
{
[$baseUrl, $contextPath] = Core::_getBaseUrlAndPath($url);
if (!$baseUrl) {
return false;
}
// Remove base url from url, if any.
$url = str_replace($baseUrl, '', $url);
// If url doesn't have the entire protocol and host part,
// remove any possible base url path from url.
$baseUrlPath = parse_url($baseUrl, PHP_URL_PATH);
if ($baseUrlPath == $url) {
// Access to the base url, no context, the entire
// url is part of the base url and we can return empty.
$url = '';
} else {
// Handle case where index.php was removed by rewrite rules,
// and we have base url followed by the args.
if (strpos($url, $baseUrlPath . '?') === 0) {
$replacement = '?'; // Url path replacement.
$baseSystemEscapedPath = preg_quote($baseUrlPath . '?', '/');
} else {
$replacement = '/'; // Url path replacement.
$baseSystemEscapedPath = preg_quote($baseUrlPath . '/', '/');
}
$url = preg_replace('/^' . $baseSystemEscapedPath . '/', $replacement, $url);
// Remove possible index.php page from url.
$url = str_replace('/index.php', '', $url);
}
if ($contextPath) {
// We found the contextPath using the base_url
// config file settings. Check if the url starts
// with the context path, if not, prepend it.
if (strpos($url, '/' . $contextPath . '/') !== 0) {
$url = '/' . $contextPath . $url;
}
}
// Remove any possible trailing slashes.
$url = rtrim($url, '/');
return $url;
}
/**
* Try to get the base url and, if configuration
* is set to use base url override, context
* path for the passed url.
*
* @param string $url
*
* @return array With two elements, base url and context path.
*/
protected static function _getBaseUrlAndPath($url)
{
$baseUrl = false;
$contextPath = false;
// Check for override base url settings.
$contextBaseUrls = Config::getContextBaseUrls();
if (empty($contextBaseUrls)) {
$baseUrl = Config::getVar('general', 'base_url');
} else {
// We are just interested in context base urls, remove the index one.
if (isset($contextBaseUrls['index'])) {
unset($contextBaseUrls['index']);
}
// Arrange them in length order, so we make sure
// we get the correct one, in case there's an overlaping
// of contexts, eg.:
// base_url[context1] = http://somesite.com/
// base_url[context2] = http://somesite.com/context2
$sortedBaseUrls = array_combine($contextBaseUrls, array_map('strlen', $contextBaseUrls));
arsort($sortedBaseUrls);
foreach (array_keys($sortedBaseUrls) as $workingBaseUrl) {
$urlHost = parse_url($url, PHP_URL_HOST);
if (is_null($urlHost)) {
// Check the base url without the host part.
$baseUrlHost = parse_url($workingBaseUrl, PHP_URL_HOST);
if (is_null($baseUrlHost)) {
break;
}
$baseUrlToSearch = substr($workingBaseUrl, strpos($workingBaseUrl, $baseUrlHost) + strlen($baseUrlHost));
// Base url with only host part, add trailing slash
// so it can be checked below.
if (!$baseUrlToSearch) {
$baseUrlToSearch = '/';
}
} else {
$baseUrlToSearch = $workingBaseUrl;
}
$baseUrlCheck = Core::_checkBaseUrl($baseUrlToSearch, $url);
if (is_null($baseUrlCheck)) {
// Can't decide. Stop searching.
break;
} elseif ($baseUrlCheck === true) {
$contextPath = array_search($workingBaseUrl, $contextBaseUrls);
$baseUrl = $workingBaseUrl;
break;
}
}
}
// If we still have no base URL, this may be a situation where we have an install with some customized URLs, and some not.
// Return the default base URL.
if (!$baseUrl) {
$baseUrl = Config::getVar('general', 'base_url');
}
return [$baseUrl, $contextPath];
}
/**
* Check if the passed base url is part of
* the passed url, based on the context base url
* configuration. Both parameters can represent
* full url (host plus path) or just the path,
* but they have to be consistent.
*
* @param string $baseUrl Full base url
* or just it's path info.
* @param string $url Full url or just it's
* path info.
*
* @return ?bool
*/
protected static function _checkBaseUrl($baseUrl, $url)
{
// Check if both base url and url have host
// component or not.
$baseUrlHasHost = (bool) parse_url($baseUrl, PHP_URL_HOST);
$urlHasHost = (bool) parse_url($url, PHP_URL_HOST);
if ($baseUrlHasHost !== $urlHasHost) {
return false;
}
$contextBaseUrls = & Config::getContextBaseUrls();
// If the base url is found inside the passed url,
// then we might found the right context path.
if (strpos($url, $baseUrl) === 0) {
if (strpos($url, '/index.php') == strlen($baseUrl) - 1) {
// index.php appears right after the base url,
// no more possible paths.
return true;
} else {
// Still have to check if there is no other context
// base url that combined with it's context path is
// equal to this base url. If it exists, we can't
// tell which base url is contained in url.
foreach ($contextBaseUrls as $contextPath => $workingBaseUrl) {
$urlToCheck = $workingBaseUrl . '/' . $contextPath;
if (!$baseUrlHasHost) {
$urlToCheck = parse_url($urlToCheck, PHP_URL_PATH);
}
if ($baseUrl == $urlToCheck) {
return null;
}
}
return true;
}
}
return false;
}
/**
* Bot list file cache miss fallback.
* (WARNING: This function appears to be used externally, hence public despite _ prefix.)
*
* @param FileCache $cache
*
* @return array
*/
public static function _botFileListCacheMiss($cache)
{
$id = $cache->getCacheId();
$filteredBotRegexps = array_filter(
file(Registry::get('currentUserAgentsFile')),
function ($regexp) {
$regexp = trim($regexp);
return !empty($regexp) && $regexp[0] != '#';
}
);
$botRegexps = array_map(
function ($regexp) {
$delimiter = '/';
$regexp = trim($regexp);
if (strpos($regexp, $delimiter) !== 0) {
// Make sure delimiters are in place.
$regexp = $delimiter . $regexp . $delimiter;
}
return $regexp;
},
$filteredBotRegexps
);
$cache->setEntireCache($botRegexps);
return $botRegexps;
}
/**
* Get passed variable value inside the passed url.
*
* @param string $url
* @param string $varName
* @param array $userVars
*
* @return string|null
*/
private static function _getUserVar($url, $varName, $userVars = [])
{
parse_str((string) parse_url($url, PHP_URL_QUERY), $userVarsFromUrl);
return $userVarsFromUrl[$varName] ?? $userVars[$varName] ?? null;
}
/**
* Get url components (page, operation and args)
* based on the passed offset.
*
* @param string $urlInfo
* @param string $isPathInfo
* @param int $offset
* @param string $varName
* @param array $userVars (optional) GET variables
* (only for testing).
*
* @return mixed array|string|null
*/
private static function _getUrlComponents($urlInfo, $isPathInfo, $offset, $varName = '', $userVars = [])
{
$component = null;
$isArrayComponent = false;
if ($varName == 'path') {
$isArrayComponent = true;
}
$vars = explode('/', trim($urlInfo ?? '', '/'));
if (count($vars) > $offset + 1) {
if ($isArrayComponent) {
$component = array_slice($vars, $offset + 1);
} else {
$component = $vars[$offset + 1];
}
}
if ($isArrayComponent) {
if (empty($component)) {
$component = [];
} elseif (!is_array($component)) {
$component = [$component];
}
}
return $component;
}
/**
* Extract the class name from the given file path.
*
* @param SplFileInfo $file info about a file extract class name from
*
* @return string fully qualified class name
*
* @see Finder
*/
public static function classFromFile(SplFileInfo $file): string
{
$libPath = realpath(base_path(PKP_LIB_PATH));
$isLib = str_starts_with($file->getRealPath(), $libPath);
$className = str_replace($isLib ? $libPath : realpath(base_path()), '', $file->getRealPath());
// Drop the "classes" from the path (we don't use it on the namespaces) and the extension
$className = preg_replace('#^[\\\\/]classes|\.php$#', '', $className);
// Include the base namespace and replace the directory separator by the namespace separator
$className = str_replace('/', '\\', '/' . ($isLib ? 'PKP' : 'APP') . $className);
return class_exists($className)
? $className
: throw new Exception("Failed to map the file \"{$file->getRealPath()}\" to a full qualified class name");
}
}
|