<?php
/**
* @file classes/task/FileLoader.php
*
* Copyright (c) 2014-2021 Simon Fraser University
* Copyright (c) 2003-2021 John Willinsky
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
*
* @class FileLoader
*
* @ingroup classes_task
*
* @brief Base scheduled task class to reliably handle files processing.
*/
namespace PKP\task;
use Exception;
use PKP\config\Config;
use PKP\db\DAORegistry;
use PKP\file\FileManager;
use PKP\scheduledTask\ScheduledTask;
use PKP\scheduledTask\ScheduledTaskHelper;
use PKP\site\Site;
use PKP\site\SiteDAO;
abstract class FileLoader extends ScheduledTask
{
public const FILE_LOADER_RETURN_TO_STAGING = 1;
public const FILE_LOADER_RETURN_TO_DISPATCH = 2;
public const FILE_LOADER_ERROR_MESSAGE_TYPE = 'common.error';
public const FILE_LOADER_WARNING_MESSAGE_TYPE = 'common.warning';
public const FILE_LOADER_PATH_STAGING = 'stage';
public const FILE_LOADER_PATH_PROCESSING = 'processing';
public const FILE_LOADER_PATH_REJECT = 'reject';
public const FILE_LOADER_PATH_ARCHIVE = 'archive';
public const FILE_LOADER_PATH_DISPATCH = 'dispatch';
/** The current claimed filename that the script is working on. */
private string $_claimedFilename;
/** Base directory path for the filesystem. */
private string $_basePath;
/** Stage directory path. */
private string $_stagePath;
/** Processing directory path. */
private string $_processingPath;
/** Archive directory path. */
private string $_archivePath;
/** Dispatch directory path. */
private string $_dispatchPath;
/** Reject directory path. */
private string $_rejectPath;
/** Admin email. */
private string $_adminEmail;
/** Admin name. */
private string $_adminName;
/** List of staged back files after processing. */
private array $_stagedBackFiles = [];
/** Whether to compress the archived files or not. */
private bool $_compressArchives = false;
/** List of files that should only be considered. */
private array $_onlyConsiderFiles = [];
/**
* Constructor.
*
* @param array $args script arguments
*/
public function __construct(array $args)
{
parent::__construct($args);
// Canonicalize the base path.
$basePath = rtrim($args[0], '/');
$basePathFolder = basename($basePath);
// We assume that the parent folder of the base path
// does already exist and can be canonicalized.
$basePathParent = realpath(dirname($basePath));
if ($basePathParent === false) {
$basePath = null;
} else {
$basePath = "{$basePathParent}/{$basePathFolder}";
}
$this->_basePath = $basePath;
// Configure paths.
if (!is_null($basePath)) {
$this->_stagePath = "{$basePath}/" . self::FILE_LOADER_PATH_STAGING;
$this->_archivePath = "{$basePath}/" . self::FILE_LOADER_PATH_ARCHIVE;
$this->_rejectPath = "{$basePath}/" . self::FILE_LOADER_PATH_REJECT;
$this->_processingPath = "{$basePath}/" . self::FILE_LOADER_PATH_PROCESSING;
$this->_dispatchPath = "{$basePath}/" . self::FILE_LOADER_PATH_DISPATCH;
}
// Set admin email and name.
$siteDao = DAORegistry::getDAO('SiteDAO'); /** @var SiteDAO $siteDao */
$site = $siteDao->getSite(); /** @var Site $site */
$this->_adminEmail = $site->getLocalizedContactEmail();
$this->_adminName = $site->getLocalizedContactName();
}
//
// Getters and setters.
//
/**
* Return the staging path.
*/
public function getStagePath(): string
{
return $this->_stagePath;
}
/**
* Return the processing path.
*/
public function getProcessingPath(): string
{
return $this->_processingPath;
}
/**
* Return the reject path.
*/
public function getRejectPath(): string
{
return $this->_rejectPath;
}
/**
* Return the archive path.
*/
public function getArchivePath(): string
{
return $this->_archivePath;
}
/**
* Return the dispatch path.
*/
public function getDispatchPath(): string
{
return $this->_dispatchPath;
}
/**
* Return whether the archives must be compressed or not.
*/
public function getCompressArchives(): bool
{
return $this->_compressArchives;
}
/**
* Set whether the archives must be compressed or not.
*/
public function setCompressArchives(bool $compressArchives): void
{
$this->_compressArchives = $compressArchives;
}
/**
* Get the files that should only be considered.
*/
public function getOnlyConsiderFiles(): array
{
return $this->_onlyConsiderFiles;
}
/**
* Set the files that should only be considered.
*/
public function setOnlyConsiderFiles(array $onlyConsiderFiles): void
{
$this->_onlyConsiderFiles = $onlyConsiderFiles;
}
//
// Public methods
//
/**
* A public helper function that can be used to ensure
* that the file structure has actually been installed.
*
* @param bool $install Set this parameter to true to
* install the folder structure if it is missing.
*
* @return bool True if the folder structure exists,
* otherwise false.
*/
public function checkFolderStructure(bool $install = false): bool
{
// Make sure that the base path is inside the private files dir.
// The files dir has appropriate write permissions and is assumed
// to be protected against information leak and symlink attacks.
$filesDir = realpath(Config::getVar('files', 'files_dir'));
if (is_null($this->_basePath) || strpos($this->_basePath, $filesDir) !== 0) {
$this->addExecutionLogEntry(
__('admin.fileLoader.wrongBasePathLocation', ['path' => $this->_basePath]),
ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR
);
return false;
}
// Check folder presence and readability.
$pathsToCheck = [
$this->_stagePath,
$this->_archivePath,
$this->_rejectPath,
$this->_processingPath,
$this->_dispatchPath
];
$fileManager = null;
foreach ($pathsToCheck as $path) {
if (!(is_dir($path) && is_readable($path))) {
if ($install) {
// Try installing the folder if it is missing.
if (is_null($fileManager)) {
$fileManager = new FileManager();
}
$fileManager->mkdirtree($path);
}
// Try again.
if (!(is_dir($path) && is_readable($path))) {
// Give up...
$this->addExecutionLogEntry(
__('admin.fileLoader.pathNotAccessible', ['path' => $path]),
ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR
);
return false;
}
}
}
return true;
}
//
// Protected methods.
//
/**
* @copydoc ScheduledTask::executeActions()
*/
protected function executeActions(): bool
{
if (!$this->checkFolderStructure()) {
return false;
}
$foundErrors = false;
while (!is_null($filePath = $this->claimNextFile())) {
if ($filePath === false) {
// Problem claiming the file.
$foundErrors = true;
break;
}
try {
$result = $this->processFile($filePath);
} catch (Exception $e) {
$foundErrors = true;
$this->rejectFile();
$this->addExecutionLogEntry($e->getMessage(), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR);
continue;
}
if ($result === self::FILE_LOADER_RETURN_TO_STAGING) {
// Send the file back to staging
$foundErrors = true;
$this->stageFile();
// Let the script know what files were sent back to staging,
// so it doesn't claim them again thereby entering an infinite loop.
$this->_stagedBackFiles[] = $this->_claimedFilename;
} elseif ($result === self::FILE_LOADER_RETURN_TO_DISPATCH) {
// Move the file to dispatch folder, where a dispatched job will find it
$this->dispatchFile();
$this->addExecutionLogEntry(__(
'admin.fileLoader.fileDispatched',
['filename' => $filePath]
), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_NOTICE);
} else {
$this->archiveFile();
}
if ($result === true) {
$this->addExecutionLogEntry(__(
'admin.fileLoader.fileProcessed',
['filename' => $filePath]
), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_NOTICE);
}
}
return !$foundErrors;
}
/**
* Process the passed file.
*
* @throws \Exception
*
* @return mixed True or self::FILE_LOADER_RETURN_TO_STAGING
*
* @see FileLoader::executeActions() to understand the expected return values.
*
*/
abstract protected function processFile(string $filePath): bool|int;
/**
* Move file between filesystem directories.
*
* @return string The destination path of the moved file.
*/
protected function moveFile(string $sourceDir, string $destDir, string $filename): string
{
$currentFilePath = "{$sourceDir}/{$filename}";
$destinationPath = "{$destDir}/{$filename}";
if (!rename($currentFilePath, $destinationPath)) {
$message = __('admin.fileLoader.moveFileFailed', ['filename' => $filename,
'currentFilePath' => $currentFilePath, 'destinationPath' => $destinationPath]);
$this->addExecutionLogEntry($message, ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR);
// Script should always stop if it can't manipulate files inside
// its own directory system.
fatalError($message);
}
return $destinationPath;
}
//
// Private helper methods.
//
/**
* Claim the first file that's inside the staging folder.
*
* @return mixed The claimed file path or false if
* the claim was not successful.
*/
private function claimNextFile(): string|false|null
{
$stageDir = opendir($this->_stagePath);
$processingFilePath = false;
while ($filename = readdir($stageDir)) {
if ($filename == '..' || $filename == '.' ||
in_array($filename, $this->_stagedBackFiles) ||
(!empty($this->_onlyConsiderFiles) && !in_array($filename, $this->_onlyConsiderFiles))) {
continue;
}
$processingFilePath = $this->moveFile($this->_stagePath, $this->_processingPath, $filename);
break;
}
if (pathinfo($processingFilePath, PATHINFO_EXTENSION) == 'gz') {
$fileMgr = new FileManager();
try {
$processingFilePath = $fileMgr->gzDecompressFile($processingFilePath);
$filename = pathinfo($processingFilePath, PATHINFO_BASENAME);
} catch (Exception $e) {
$this->moveFile($this->_processingPath, $this->_stagePath, $filename);
$this->addExecutionLogEntry($e->getMessage(), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR);
return false;
}
}
if ($processingFilePath) {
$this->_claimedFilename = $filename;
return $processingFilePath;
} else {
return null;
}
}
/**
* Reject the current claimed file.
*/
private function rejectFile(): void
{
$this->moveFile($this->_processingPath, $this->_rejectPath, $this->_claimedFilename);
}
/**
* Move the current claimed file into the dispatch folder.
*/
protected function dispatchFile(): void
{
$this->moveFile($this->_processingPath, $this->_dispatchPath, $this->_claimedFilename);
}
/**
* Archive the current claimed file.
*/
private function archiveFile(): void
{
$this->moveFile($this->_processingPath, $this->_archivePath, $this->_claimedFilename);
if ($this->getCompressArchives()) {
try {
$fileMgr = new FileManager();
$filePath = "{$this->_archivePath}/{$this->_claimedFilename}";
$fileMgr->gzCompressFile($filePath);
} catch (Exception $e) {
$this->addExecutionLogEntry($e->getMessage(), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR);
}
}
}
/**
* Stage the current claimed file.
*/
private function stageFile(): void
{
$this->moveFile($this->_processingPath, $this->_stagePath, $this->_claimedFilename);
}
}
if (!PKP_STRICT_MODE) {
class_alias('\PKP\task\FileLoader', '\FileLoader');
foreach ([
'FILE_LOADER_RETURN_TO_STAGING',
'FILE_LOADER_ERROR_MESSAGE_TYPE',
'FILE_LOADER_WARNING_MESSAGE_TYPE',
'FILE_LOADER_PATH_STAGING',
'FILE_LOADER_PATH_PROCESSING',
'FILE_LOADER_PATH_REJECT',
'FILE_LOADER_PATH_ARCHIVE',
] as $constantName) {
define($constantName, constant('\FileLoader::' . $constantName));
}
}
|