Mugo Web main content.

Re-building a corrupt eZ Publish cluster table

By: Peter Keung | July 4, 2013 | eZ Publish development tips

We were called to investigate a clustered eZ Publish 4.5 (with eZ DFS) site where most of the images were suddenly missing. As we discovered, due to an erroneous MySQL command, the "ezdfsfile" table had been truncated. It was near the end of the day, and there were no viable database backups since the previous night. We either had to somehow rebuild the "ezdfsfile" table or ask many editors across different departments to re-do all their work from that day. It was a rather panicked situation.

Thankfully, the binary file references in the "ezdfsfile" table are still available in the standard eZ Publish tables "ezbinaryfile", "ezmedia", and "ezimagefile". Therefore, we were able to make a quick hack on the existing bin/php/clusterize.php script (used to do an original setup of eZ DFS) to create a clusterize_restore.php script that rebuilds the "ezdfsfile" table. It came in handy the following day as well since the same bad MySQL command was run again, causing more panic!

With a few disclaimers, we present the script below:

  • This is a script to repair a very specific use case, where only ezdfsfile was corrupted
  • Due to the above, no attempt has been made to polish this script other than adding the comment block at the top
  • It does not consider any cache file references
  • At the time, the "ezdfsfile" table was in the same database as the other eZ Publish tables; the script needs to be modified if "ezdfsfile" has its own database
#!/usr/bin/env php
<?php
/*
 * Copy of bin/php/clusterize.php that re-fills a corrupted ezdfsfile table
 * Be sure to configure the path to your NFS share, the database parameters, and the ezdfsfile table
 * It presumes that the ezdfsfile table is in the same database as the rest of eZ Publish
 * You will have to make a separate database connection if ezdfsfile has its own database
 * Run like this: php clusterize_restore.php
*/
define( 'MOUNT_POINT_PATH', 'path-to-nfs-share/' );

error_reporting( E_ALL | E_NOTICE );

require 'autoload.php';

$dbparams['host']       = 'dbhost';
$dbparams['user']       = 'dbuser';
$dbparams['pass']       = 'dbpass';
$dbparams['name']       = 'dbname';

$db = mysql_connect( $dbparams['host'], $dbparams['user'], $dbparams['pass'] );
mysql_select_db( $dbparams['name'], $db );

// This code is taken from eZBinaryFile::storedFileInfo()
function filePathForBinaryFile($fileName, $mimeType )
{
    $storageDir = eZSys::storageDirectory();
    list( $group, $type ) = explode( '/', $mimeType );
    $filePath = $storageDir . '/original/' . $group . '/' . $fileName;
    return $filePath;
}

function copyBinaryfilesToDB( $remove )
{
    global $cli, $fileHandler;

    $db = eZDB::instance();

    $cli->output( "Importing binary files to database:");
    $rows = $db->arrayQuery('select filename, mime_type from ezbinaryfile' );

    foreach( $rows as $row )
    {
        $filePath = filePathForBinaryFile( $row['filename'] , $row['mime_type'] );
        $cli->output( "- " . $filePath);
        //$fileHandler->fileStore( $filePath, 'binaryfile', $remove );
        storeInner( $filePath, 'misc', 'binaryfile' );
    }

    $cli->output();
}

function copyMediafilesToDB( $remove )
{
    global $cli, $fileHandler;

    $db = eZDB::instance();

    $cli->output( "Importing media files to database:");
    $rows = $db->arrayQuery('select filename, mime_type from ezmedia' );
    foreach( $rows as $row )
    {
        $filePath = filePathForBinaryFile( $row['filename'], $row['mime_type'] );
        $cli->output( "- " . $filePath);
        //$fileHandler->fileStore( $filePath, 'mediafile', $remove );
        storeInner( $filePath, 'misc', 'mediafile' );
    }

    $cli->output();
}

function copyImagesToDB( $remove )
{
    global $cli, $fileHandler;

    $db = eZDB::instance();

    $cli->output( "Importing images and imagealiases files to database:");
    $rows = $db->arrayQuery('select filepath from ezimagefile' );
    foreach( $rows as $row )
    {
        $filePath = $row['filepath'];
        $cli->output( "- " . $filePath);

        $mimeData = eZMimeType::findByFileContents( prependMountPath( $filePath ) );
        //$fileHandler->fileStore( $filePath, 'image', $remove, $mimeData['name'] );
        // _storeInner( $filePath, $datatype, $scope, $fname )
        storeInner( $filePath, $mimeData['name'], 'image' );
    }
}

function copyFilesFromDB( $excludeScopes, $remove )
{
    global $cli, $fileHandler;

    $cli->output( "Exporting files from database:");
    $filePathList = $fileHandler->getFileList( $excludeScopes, true );

    foreach ( $filePathList as $filePath )
    {
        $cli->output( "- " . $filePath );
        eZDir::mkdir( dirname( $filePath ), false, true );
        $fileHandler->fileFetch( $filePath );

        if ( $remove )
            $fileHandler->fileDelete( $filePath );
    }

    $cli->output();
}

function prependMountPath( $filename )
{
    $filename = MOUNT_POINT_PATH . $filename;
    return $filename;
}

function storeInner( $filePath, $datatype, $scope, $fname = false )
{
    global $cli;
    // Insert file metadata.
    clearstatcache();
    $nfsFilePath = prependMountPath( $filePath );
    $fileMTime = filemtime( $nfsFilePath );
    $contentLength = filesize( $nfsFilePath );
    $filePathHash = md5( $filePath );
    $nameTrunk = nameTrunk( $filePath, $scope );

    if ( insertUpdate( 'ezdfsfile',
        array( 'datatype' => $datatype,
               'name' => $filePath,
               'name_trunk' => $nameTrunk,
               'name_hash' => $filePathHash,
               'scope' => $scope,
               'size' => $contentLength,
               'mtime' => $fileMTime,
               'expired' => ( $fileMTime < 0 ) ? 1 : 0 ),
        "datatype=VALUES(datatype), scope=VALUES(scope), size=VALUES(size), mtime=VALUES(mtime), expired=VALUES(expired)",
        $fname ) === false )
    {
        $cli->output( "Failed to insert file metadata while storing. Possible race condition" );
    }

    // copy given $filePath to DFS
    // if ( !$this->dfsbackend->copyToDFS( $filePath ) )
    // {
    //    return $this->_fail( "Failed to copy FS://$filePath to DFS://$filePath" );
    //}

    return true;
}

// This is probably not needed since our scope is never viewcache or template-block; instead we could just return $filePath
function nameTrunk( $filePath, $scope )
{
    switch ( $scope )
    {
        case 'viewcache':
        {
            $nameTrunk = substr( $filePath, 0, strrpos( $filePath, '-' ) + 1 );
        } break;

        case 'template-block':
        {
            $templateBlockCacheDir = eZTemplateCacheBlock::templateBlockCacheDir();
            $templateBlockPath = str_replace( $templateBlockCacheDir, '', $filePath );
            if ( strstr( $templateBlockPath, 'subtree/' ) !== false )
            {
                // 6 = strlen( 'cache/' );
                $len = strlen( $templateBlockCacheDir ) + strpos( $templateBlockPath, 'cache/' ) + 6;
                $nameTrunk = substr( $filePath, 0, $len  );
            }
            else
            {
               $nameTrunk = $filePath;
            }
        } break;

        default:
        {
            $nameTrunk = $filePath;
        }
    }
    return $nameTrunk;
}

function insertUpdate( $table, $array, $update, $fname, $reportError = true )
{
    global $db;
    $keys = array_keys( $array );
    $query = "INSERT INTO $table (" . join( ", ", $keys ) . ") VALUES (" . sqlList( $array ) . ")\nON DUPLICATE KEY UPDATE $update";
    $res = query( $query, $fname, $reportError );
    if ( !$res )
    {
        // @todo Throw an exception
        return false;
    }
    return mysql_insert_id( $db );
}

function query( $query, $fname = false, $reportError = true )
{
    global $cli, $db;
    $cli->output( 'QUERY: ' . $query );
    //die();
    $res = mysql_query( $query, $db );
    if ( !$res && $reportError )
    {
        $cli->output( 'Database error with query: ' . $query );
        $cli->output( 'File name: ' . $fname );
    }

    return $res;
}

function sqlList( $array )
{
    $text = "";
    $sep = "";
    foreach ( $array as $e )
    {
        $text .= $sep;
        $text .= quote( $e );
        $sep = ", ";
    }
    return $text;
}
function quote( $value )
{
    if ( $value === null )
        return 'NULL';
    elseif ( is_integer( $value ) )
        return (string)$value;
    else
        return "'" . mysql_real_escape_string( $value ) . "'";
}

$cli = eZCLI::instance();
$script = eZScript::instance( array( 'description' => ( "eZ Publish (un)clusterize\n" .
                                                        "Script for moving var_dir files from " .
                                                        "filesystem to database and vice versa\n" .
                                                        "\n" .
                                                        "./clusterize_restore.php" ),
                                     'use-session'    => false,
                                     'use-modules'    => false,
                                     'use-extensions' => true ) );

$script->startup();

$options = $script->getOptions( "[u][skip-binary-files][skip-media-files][skip-images][r][n]",
                                "",
                                array( 'u'                 => 'Unclusterize',
                                       'skip-binary-files' => 'Skip copying binary files',
                                       'skip-media-files'  => 'Skip copying media files',
                                       'skip-images'       => 'Skip copying images',
                                       'r'                 => 'Remove files after copying',
                                       'n'                 => 'Do not wait' ) );

$script->initialize();

$clusterize = !isset( $options['u'] );
$remove     =  isset( $options['r'] );
$copyFiles  = !isset( $options['skip-binary-files'] );
$copyMedia  = !isset( $options['skip-media-files'] );
$copyImages = !isset( $options['skip-images'] );
$wait       = !isset( $options['n'] );

// DO NOT REMOVE THE ORIGINAL FILE NO MATTER WHAT IS SENT
$remove = false;

if ( $wait )
{
    $warningMsg = sprintf( "This script will now %s your files and/or images %s database.",
                           ( $remove ? "move" : "copy" ),
                           ( $clusterize ? 'to' : 'from' ) );
    $cli->warning( $warningMsg );
    $cli->warning( "You have 10 seconds to break the script (press Ctrl-C)." );
    sleep( 10 );
}

$fileHandler = eZClusterFileHandler::instance();
if ( !is_object( $fileHandler ) )
{
    $cli->error( "Clustering settings specified incorrectly or the chosen file handler is ezfs." );
    $script->shutdown( 1 );
}
// the script will only run if clusterizing is supported by the currently
// configured handler
elseif ( !$fileHandler->requiresClusterizing() )
{
    $message = "The current cluster handler (" . get_class( $fileHandler ) . ") " .
               "doesn't require/support running this script";
    $cli->output( $message );
    $script->shutdown( 0 );
}

// clusterize, from FS => cluster
if ( $clusterize )
{
    if ( $copyFiles )
    {
        copyBinaryfilesToDB( $remove );
    }
    if ( $copyImages )
    {
        copyImagesToDB( $remove );
    }
    if ( $copyMedia )
    {
        copyMediafilesToDB( $remove );
    }
}
// unclusterize, from cluster => FS
else
{
    $excludeScopes = array();
    if ( !$copyFiles )
        $excludeScopes[] = 'binaryfile';
    if ( !$copyImages )
        $excludeScopes[] = 'image';
    if ( !$copyMedia )
        $excludeScopes[] = 'mediafile';

    copyFilesFromDB( $excludeScopes, $remove );
}

$script->shutdown();
?>