Merge pull request #965 from flowln/fat_files_in_memory

Refactor a bit EnsureMetadataTask and calculate hashes in a incremental manner
This commit is contained in:
Sefa Eyeoglu 2022-08-28 11:03:12 +02:00 committed by GitHub
commit afcd669d2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 378 additions and 183 deletions

View File

@ -494,6 +494,8 @@ set(API_SOURCES
modplatform/modrinth/ModrinthAPI.cpp
modplatform/helpers/NetworkModAPI.h
modplatform/helpers/NetworkModAPI.cpp
modplatform/helpers/HashUtils.h
modplatform/helpers/HashUtils.cpp
)
set(FTB_SOURCES

View File

@ -3,81 +3,73 @@
#include <MurmurHash2.h>
#include <QDebug>
#include "FileSystem.h"
#include "Json.h"
#include "minecraft/mod/Mod.h"
#include "minecraft/mod/tasks/LocalModUpdateTask.h"
#include "modplatform/flame/FlameAPI.h"
#include "modplatform/flame/FlameModIndex.h"
#include "modplatform/modrinth/ModrinthAPI.h"
#include "modplatform/modrinth/ModrinthPackIndex.h"
#include "net/NetJob.h"
#include "tasks/MultipleOptionsTask.h"
static ModPlatform::ProviderCapabilities ProviderCaps;
static ModrinthAPI modrinth_api;
static FlameAPI flame_api;
EnsureMetadataTask::EnsureMetadataTask(Mod* mod, QDir dir, ModPlatform::Provider prov) : Task(nullptr), m_index_dir(dir), m_provider(prov)
EnsureMetadataTask::EnsureMetadataTask(Mod* mod, QDir dir, ModPlatform::Provider prov)
: Task(nullptr), m_index_dir(dir), m_provider(prov), m_hashing_task(nullptr), m_current_task(nullptr)
{
auto hash = getHash(mod);
if (hash.isEmpty())
emitFail(mod);
else
m_mods.insert(hash, mod);
auto hash_task = createNewHash(mod);
if (!hash_task)
return;
connect(hash_task.get(), &Task::succeeded, [this, hash_task, mod] { m_mods.insert(hash_task->getResult(), mod); });
connect(hash_task.get(), &Task::failed, [this, hash_task, mod] { emitFail(mod, "", RemoveFromList::No); });
hash_task->start();
}
EnsureMetadataTask::EnsureMetadataTask(QList<Mod*>& mods, QDir dir, ModPlatform::Provider prov)
: Task(nullptr), m_index_dir(dir), m_provider(prov)
: Task(nullptr), m_index_dir(dir), m_provider(prov), m_current_task(nullptr)
{
m_hashing_task = new ConcurrentTask(this, "MakeHashesTask", 10);
for (auto* mod : mods) {
if (!mod->valid()) {
emitFail(mod);
auto hash_task = createNewHash(mod);
if (!hash_task)
continue;
}
auto hash = getHash(mod);
if (hash.isEmpty()) {
emitFail(mod);
continue;
}
m_mods.insert(hash, mod);
connect(hash_task.get(), &Task::succeeded, [this, hash_task, mod] { m_mods.insert(hash_task->getResult(), mod); });
connect(hash_task.get(), &Task::failed, [this, hash_task, mod] { emitFail(mod, "", RemoveFromList::No); });
m_hashing_task->addTask(hash_task);
}
}
QString EnsureMetadataTask::getHash(Mod* mod)
Hashing::Hasher::Ptr EnsureMetadataTask::createNewHash(Mod* mod)
{
/* Here we create a mapping hash -> mod, because we need that relationship to parse the API routes */
QByteArray jar_data;
try {
jar_data = FS::read(mod->fileinfo().absoluteFilePath());
} catch (FS::FileSystemException& e) {
qCritical() << QString("Failed to open / read JAR file of %1").arg(mod->name());
qCritical() << QString("Reason: ") << e.cause();
if (!mod || !mod->valid() || mod->type() == Mod::MOD_FOLDER)
return nullptr;
return {};
return Hashing::createHasher(mod->fileinfo().absoluteFilePath(), m_provider);
}
QString EnsureMetadataTask::getExistingHash(Mod* mod)
{
// Check for already computed hashes
// (linear on the number of mods vs. linear on the size of the mod's JAR)
auto it = m_mods.keyValueBegin();
while (it != m_mods.keyValueEnd()) {
if ((*it).second == mod)
break;
it++;
}
switch (m_provider) {
case ModPlatform::Provider::MODRINTH: {
auto hash_type = ProviderCaps.hashType(ModPlatform::Provider::MODRINTH).first();
return QString(ProviderCaps.hash(ModPlatform::Provider::MODRINTH, jar_data, hash_type).toHex());
}
case ModPlatform::Provider::FLAME: {
QByteArray jar_data_treated;
for (char c : jar_data) {
// CF-specific
if (!(c == 9 || c == 10 || c == 13 || c == 32))
jar_data_treated.push_back(c);
}
return QString::number(MurmurHash2(jar_data_treated, jar_data_treated.length()));
}
// We already have the hash computed
if (it != m_mods.keyValueEnd()) {
return (*it).first;
}
// No existing hash
return {};
}
@ -127,11 +119,9 @@ void EnsureMetadataTask::executeTask()
}
auto invalidade_leftover = [this] {
QMutableHashIterator<QString, Mod*> mods_iter(m_mods);
while (mods_iter.hasNext()) {
auto mod = mods_iter.next();
emitFail(mod.value());
}
for (auto mod = m_mods.constBegin(); mod != m_mods.constEnd(); mod++)
emitFail(mod.value(), mod.key(), RemoveFromList::No);
m_mods.clear();
emitSucceeded();
};
@ -178,20 +168,44 @@ void EnsureMetadataTask::executeTask()
version_task->start();
}
void EnsureMetadataTask::emitReady(Mod* m)
void EnsureMetadataTask::emitReady(Mod* m, QString key, RemoveFromList remove)
{
if (!m) {
qCritical() << "Tried to mark a null mod as ready.";
if (!key.isEmpty())
m_mods.remove(key);
return;
}
qDebug() << QString("Generated metadata for %1").arg(m->name());
emit metadataReady(m);
m_mods.remove(getHash(m));
if (remove == RemoveFromList::Yes) {
if (key.isEmpty())
key = getExistingHash(m);
m_mods.remove(key);
}
}
void EnsureMetadataTask::emitFail(Mod* m)
void EnsureMetadataTask::emitFail(Mod* m, QString key, RemoveFromList remove)
{
if (!m) {
qCritical() << "Tried to mark a null mod as failed.";
if (!key.isEmpty())
m_mods.remove(key);
return;
}
qDebug() << QString("Failed to generate metadata for %1").arg(m->name());
emit metadataFailed(m);
m_mods.remove(getHash(m));
if (remove == RemoveFromList::Yes) {
if (key.isEmpty())
key = getExistingHash(m);
m_mods.remove(key);
}
}
// Modrinth

View File

@ -1,12 +1,14 @@
#pragma once
#include "ModIndex.h"
#include "tasks/SequentialTask.h"
#include "net/NetJob.h"
#include "modplatform/helpers/HashUtils.h"
#include "tasks/ConcurrentTask.h"
class Mod;
class QDir;
class MultipleOptionsTask;
class EnsureMetadataTask : public Task {
Q_OBJECT
@ -17,6 +19,8 @@ class EnsureMetadataTask : public Task {
~EnsureMetadataTask() = default;
Task::Ptr getHashingTask() { return m_hashing_task; }
public slots:
bool abort() override;
protected slots:
@ -31,10 +35,16 @@ class EnsureMetadataTask : public Task {
auto flameProjectsTask() -> NetJob::Ptr;
// Helpers
void emitReady(Mod*);
void emitFail(Mod*);
enum class RemoveFromList {
Yes,
No
};
void emitReady(Mod*, QString key = {}, RemoveFromList = RemoveFromList::Yes);
void emitFail(Mod*, QString key = {}, RemoveFromList = RemoveFromList::Yes);
auto getHash(Mod*) -> QString;
// Hashes and stuff
auto createNewHash(Mod*) -> Hashing::Hasher::Ptr;
auto getExistingHash(Mod*) -> QString;
private slots:
void modrinthCallback(ModPlatform::IndexedPack& pack, ModPlatform::IndexedVersion& ver, Mod*);
@ -50,5 +60,6 @@ class EnsureMetadataTask : public Task {
ModPlatform::Provider m_provider;
QHash<QString, ModPlatform::IndexedVersion> m_temp_versions;
ConcurrentTask* m_hashing_task;
NetJob* m_current_task;
};

View File

@ -19,6 +19,8 @@
#include "modplatform/ModIndex.h"
#include <QCryptographicHash>
#include <QDebug>
#include <QIODevice>
namespace ModPlatform {
@ -53,34 +55,26 @@ auto ProviderCapabilities::hashType(Provider p) -> QStringList
}
return {};
}
auto ProviderCapabilities::hash(Provider p, QByteArray& data, QString type) -> QByteArray
auto ProviderCapabilities::hash(Provider p, QIODevice* device, QString type) -> QString
{
QCryptographicHash::Algorithm algo = QCryptographicHash::Sha1;
switch (p) {
case Provider::MODRINTH: {
// NOTE: Data is the result of reading the entire JAR file!
// If 'type' was specified, we use that
if (!type.isEmpty() && hashType(p).contains(type)) {
if (type == "sha512")
return QCryptographicHash::hash(data, QCryptographicHash::Sha512);
else if (type == "sha1")
return QCryptographicHash::hash(data, QCryptographicHash::Sha1);
}
return QCryptographicHash::hash(data, QCryptographicHash::Sha512);
algo = (type == "sha1") ? QCryptographicHash::Sha1 : QCryptographicHash::Sha512;
break;
}
case Provider::FLAME:
// If 'type' was specified, we use that
if (!type.isEmpty() && hashType(p).contains(type)) {
if(type == "sha1")
return QCryptographicHash::hash(data, QCryptographicHash::Sha1);
else if (type == "md5")
return QCryptographicHash::hash(data, QCryptographicHash::Md5);
}
algo = (type == "sha1") ? QCryptographicHash::Sha1 : QCryptographicHash::Md5;
break;
}
return {};
QCryptographicHash hash(algo);
if(!hash.addData(device))
qCritical() << "Failed to read JAR to create hash!";
Q_ASSERT(hash.result().length() == hash.hashLength(algo));
return { hash.result().toHex() };
}
} // namespace ModPlatform

View File

@ -24,6 +24,8 @@
#include <QVariant>
#include <QVector>
class QIODevice;
namespace ModPlatform {
enum class Provider {
@ -36,7 +38,7 @@ class ProviderCapabilities {
auto name(Provider) -> const char*;
auto readableName(Provider) -> QString;
auto hashType(Provider) -> QStringList;
auto hash(Provider, QByteArray&, QString type = "") -> QByteArray;
auto hash(Provider, QIODevice*, QString type = "") -> QString;
};
struct ModpackAuthor {

View File

@ -0,0 +1,81 @@
#include "HashUtils.h"
#include <QDebug>
#include <QFile>
#include "FileSystem.h"
#include <MurmurHash2.h>
namespace Hashing {
static ModPlatform::ProviderCapabilities ProviderCaps;
Hasher::Ptr createHasher(QString file_path, ModPlatform::Provider provider)
{
switch (provider) {
case ModPlatform::Provider::MODRINTH:
return createModrinthHasher(file_path);
case ModPlatform::Provider::FLAME:
return createFlameHasher(file_path);
default:
qCritical() << "[Hashing]"
<< "Unrecognized mod platform!";
return nullptr;
}
}
Hasher::Ptr createModrinthHasher(QString file_path)
{
return new ModrinthHasher(file_path);
}
Hasher::Ptr createFlameHasher(QString file_path)
{
return new FlameHasher(file_path);
}
void ModrinthHasher::executeTask()
{
QFile file(m_path);
try {
file.open(QFile::ReadOnly);
} catch (FS::FileSystemException& e) {
qCritical() << QString("Failed to open JAR file in %1").arg(m_path);
qCritical() << QString("Reason: ") << e.cause();
emitFailed("Failed to open file for hashing.");
return;
}
auto hash_type = ProviderCaps.hashType(ModPlatform::Provider::MODRINTH).first();
m_hash = ProviderCaps.hash(ModPlatform::Provider::MODRINTH, &file, hash_type);
file.close();
if (m_hash.isEmpty()) {
emitFailed("Empty hash!");
} else {
emitSucceeded();
}
}
void FlameHasher::executeTask()
{
// CF-specific
auto should_filter_out = [](char c) { return (c == 9 || c == 10 || c == 13 || c == 32); };
std::ifstream file_stream(m_path.toStdString(), std::ifstream::binary);
// TODO: This is very heavy work, but apparently QtConcurrent can't use move semantics, so we can't boop this to another thread.
// How do we make this non-blocking then?
m_hash = QString::number(MurmurHash2(std::move(file_stream), 4 * MiB, should_filter_out));
if (m_hash.isEmpty()) {
emitFailed("Empty hash!");
} else {
emitSucceeded();
}
}
} // namespace Hashing

View File

@ -0,0 +1,47 @@
#pragma once
#include <QString>
#include "modplatform/ModIndex.h"
#include "tasks/Task.h"
namespace Hashing {
class Hasher : public Task {
public:
using Ptr = shared_qobject_ptr<Hasher>;
Hasher(QString file_path) : m_path(std::move(file_path)) {}
/* We can't really abort this task, but we can say we aborted and finish our thing quickly :) */
bool abort() override { return true; }
void executeTask() override = 0;
QString getResult() const { return m_hash; };
QString getPath() const { return m_path; };
protected:
QString m_hash;
QString m_path;
};
class FlameHasher : public Hasher {
public:
FlameHasher(QString file_path) : Hasher(file_path) { setObjectName(QString("FlameHasher: %1").arg(file_path)); }
void executeTask() override;
};
class ModrinthHasher : public Hasher {
public:
ModrinthHasher(QString file_path) : Hasher(file_path) { setObjectName(QString("ModrinthHasher: %1").arg(file_path)); }
void executeTask() override;
};
Hasher::Ptr createHasher(QString file_path, ModPlatform::Provider provider);
Hasher::Ptr createFlameHasher(QString file_path);
Hasher::Ptr createModrinthHasher(QString file_path);
} // namespace Hashing

View File

@ -2,11 +2,14 @@
#include "ModrinthAPI.h"
#include "ModrinthPackIndex.h"
#include "FileSystem.h"
#include "Json.h"
#include "ModDownloadTask.h"
#include "modplatform/helpers/HashUtils.h"
#include "tasks/ConcurrentTask.h"
static ModrinthAPI api;
static ModPlatform::ProviderCapabilities ProviderCaps;
@ -32,6 +35,8 @@ void ModrinthCheckUpdate::executeTask()
// Create all hashes
QStringList hashes;
auto best_hash_type = ProviderCaps.hashType(ModPlatform::Provider::MODRINTH).first();
ConcurrentTask hashing_task(this, "MakeModrinthHashesTask", 10);
for (auto* mod : m_mods) {
if (!mod->enabled()) {
emit checkFailed(mod, tr("Disabled mods won't be updated, to prevent mod duplication issues!"));
@ -44,25 +49,25 @@ void ModrinthCheckUpdate::executeTask()
// need to generate a new hash if the current one is innadequate
// (though it will rarely happen, if at all)
if (mod->metadata()->hash_format != best_hash_type) {
QByteArray jar_data;
try {
jar_data = FS::read(mod->fileinfo().absoluteFilePath());
} catch (FS::FileSystemException& e) {
qCritical() << QString("Failed to open / read JAR file of %1").arg(mod->name());
qCritical() << QString("Reason: ") << e.cause();
failed(e.what());
return;
}
hash = QString(ProviderCaps.hash(ModPlatform::Provider::MODRINTH, jar_data, best_hash_type).toHex());
auto hash_task = Hashing::createModrinthHasher(mod->fileinfo().absoluteFilePath());
connect(hash_task.get(), &Task::succeeded, [&] {
QString hash (hash_task->getResult());
hashes.append(hash);
mappings.insert(hash, mod);
});
connect(hash_task.get(), &Task::failed, [this, hash_task] { failed("Failed to generate hash"); });
hashing_task.addTask(hash_task);
} else {
hashes.append(hash);
mappings.insert(hash, mod);
}
hashes.append(hash);
mappings.insert(hash, mod);
}
QEventLoop loop;
connect(&hashing_task, &Task::finished, [&loop]{ loop.quit(); });
hashing_task.start();
loop.exec();
auto* response = new QByteArray();
auto job = api.latestVersions(hashes, best_hash_type, m_game_versions, m_loaders, response);

View File

@ -1,10 +1,11 @@
#include "ConcurrentTask.h"
#include <QDebug>
#include <QCoreApplication>
ConcurrentTask::ConcurrentTask(QObject* parent, QString task_name, int max_concurrent)
: Task(parent), m_name(task_name), m_total_max_size(max_concurrent)
{}
{ setObjectName(task_name); }
ConcurrentTask::~ConcurrentTask()
{
@ -36,8 +37,9 @@ void ConcurrentTask::executeTask()
{
m_total_size = m_queue.size();
for (int i = 0; i < m_total_max_size; i++)
startNext();
for (int i = 0; i < m_total_max_size; i++) {
QMetaObject::invokeMethod(this, &ConcurrentTask::startNext, Qt::QueuedConnection);
}
}
bool ConcurrentTask::abort()
@ -91,6 +93,8 @@ void ConcurrentTask::startNext()
setStepStatus(next->isMultiStep() ? next->getStepStatus() : next->getStatus());
updateState();
QCoreApplication::processEvents();
next->start();
}

View File

@ -270,6 +270,10 @@ auto ModUpdateDialog::ensureMetadata() -> bool
connect(modrinth_task, &EnsureMetadataTask::metadataFailed, [this, &should_try_others](Mod* candidate) {
onMetadataFailed(candidate, should_try_others.find(candidate->internal_id()).value(), ModPlatform::Provider::MODRINTH);
});
if (modrinth_task->getHashingTask())
seq.addTask(modrinth_task->getHashingTask());
seq.addTask(modrinth_task);
}
@ -279,6 +283,10 @@ auto ModUpdateDialog::ensureMetadata() -> bool
connect(flame_task, &EnsureMetadataTask::metadataFailed, [this, &should_try_others](Mod* candidate) {
onMetadataFailed(candidate, should_try_others.find(candidate->internal_id()).value(), ModPlatform::Provider::FLAME);
});
if (flame_task->getHashingTask())
seq.addTask(flame_task->getHashingTask());
seq.addTask(flame_task);
}

View File

@ -1,86 +1,110 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
//
// This was modified as to possibilitate it's usage incrementally.
// Those modifications are also placed in the public domain, and the author of
// such modifications hereby disclaims copyright to this source code.
#include "MurmurHash2.h"
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
//-----------------------------------------------------------------------------
uint64_t MurmurHash2 ( const void* key, int len, uint32_t seed )
uint32_t MurmurHash2(std::ifstream&& file_stream, std::size_t buffer_size, std::function<bool(char)> filter_out)
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
auto* buffer = new char[buffer_size];
char data[4];
const uint32_t m = 0x5bd1e995;
const int r = 24;
int read = 0;
uint32_t size = 0;
// Initialize the hash to a 'random' value
// We need the size without the filtered out characters before actually calculating the hash,
// to setup the initial value for the hash.
do {
file_stream.read(buffer, buffer_size);
read = file_stream.gcount();
for (int i = 0; i < read; i++) {
if (!filter_out(buffer[i]))
size += 1;
}
} while (!file_stream.eof());
uint32_t h = seed ^ len;
file_stream.clear();
file_stream.seekg(0, file_stream.beg);
// Mix 4 bytes at a time into the hash
const auto* data = (const unsigned char*) key;
while(len >= 4)
{
uint32_t k = *(uint32_t*)data;
int index = 0;
k *= m;
k ^= k >> r;
k *= m;
// This forces a seed of 1.
IncrementalHashInfo info{ (uint32_t)1 ^ size, (uint32_t)size };
do {
file_stream.read(buffer, buffer_size);
read = file_stream.gcount();
for (int i = 0; i < read; i++) {
char c = buffer[i];
h *= m;
h ^= k;
if (filter_out(c))
continue;
data += 4*sizeof(char);
len -= 4;
}
data[index] = c;
index = (index + 1) % 4;
// Handle the last few bytes of the input array
// Mix 4 bytes at a time into the hash
if (index == 0)
FourBytes_MurmurHash2((unsigned char*)&data, info);
}
} while (!file_stream.eof());
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do one last bit shuffle in the hash
FourBytes_MurmurHash2((unsigned char*)&data, info);
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
delete[] buffer;
h ^= h >> 13;
h *= m;
h ^= h >> 15;
file_stream.close();
return info.h;
}
return h;
}
void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev)
{
if (prev.len >= 4) {
// Not the final mix
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
prev.h *= m;
prev.h ^= k;
prev.len -= 4;
} else {
// The final mix
// Handle the last few bytes of the input array
switch (prev.len) {
case 3:
prev.h ^= data[2] << 16;
case 2:
prev.h ^= data[1] << 8;
case 1:
prev.h ^= data[0];
prev.h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
prev.h ^= prev.h >> 13;
prev.h *= m;
prev.h ^= prev.h >> 15;
prev.len = 0;
}
}
//-----------------------------------------------------------------------------

View File

@ -1,30 +1,33 @@
//-----------------------------------------------------------------------------
// MurmurHash2 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// The original MurmurHash2 was written by Austin Appleby, and is placed in the
// public domain. The author hereby disclaims copyright to this source code.
//
// This was modified as to possibilitate it's usage incrementally.
// Those modifications are also placed in the public domain, and the author of
// such modifications hereby disclaims copyright to this source code.
#pragma once
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
#include <cstdint>
#include <fstream>
// Microsoft Visual Studio
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else // defined(_MSC_VER)
#include <stdint.h>
#endif // !defined(_MSC_VER)
#include <functional>
//-----------------------------------------------------------------------------
uint64_t MurmurHash2 ( const void* key, int len, uint32_t seed = 1 );
#define KiB 1024
#define MiB 1024*KiB
uint32_t MurmurHash2(
std::ifstream&& file_stream,
std::size_t buffer_size = 4*MiB,
std::function<bool(char)> filter_out = [](char) { return false; });
struct IncrementalHashInfo {
uint32_t h;
uint32_t len;
};
void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev);
//-----------------------------------------------------------------------------