mirror of https://github.com/alibaba/MNN.git
273 lines
11 KiB
C++
273 lines
11 KiB
C++
//
|
|
// dataLoaderTest.cpp
|
|
// MNN
|
|
//
|
|
// Created by MNN on 2019/11/20.
|
|
// Copyright © 2018, Alibaba Group Holding Limited
|
|
//
|
|
|
|
#include <MNN/expr/ExprCreator.hpp>
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include "DataLoader.hpp"
|
|
#include "DemoUnit.hpp"
|
|
#include "MnistDataset.hpp"
|
|
#include "LambdaTransform.hpp"
|
|
#include "RandomSampler.hpp"
|
|
#include "Sampler.hpp"
|
|
#include "StackTransform.hpp"
|
|
#include "Transform.hpp"
|
|
#include "TransformDataset.hpp"
|
|
|
|
using namespace std;
|
|
using namespace MNN::Train;
|
|
using namespace MNN;
|
|
|
|
class DataLoaderTest : public DemoUnit {
|
|
public:
|
|
// this function is an example to use the lambda transform
|
|
// here we use lambda transform to normalize data from 0~255 to 0~1
|
|
static Example func(Example example) {
|
|
// an easier way to do this
|
|
auto cast = _Cast(example.first[0], halide_type_of<float>());
|
|
return {{_Multiply(cast, _Const(1.0f / 255.0f)), example.first[1]}, {example.second}};
|
|
}
|
|
|
|
virtual int run(int argc, const char* argv[]) override {
|
|
if (argc != 2) {
|
|
cout << "usage: ./runTrainDemo.out DataLoaderTest /path/to/unzipped/mnist/data/" << endl;
|
|
return 0;
|
|
}
|
|
|
|
const int testCount = 6;
|
|
int passedTestCount = 0;
|
|
|
|
std::string root = argv[1];
|
|
|
|
// train data loader
|
|
const size_t trainDatasetSize = 60000;
|
|
auto trainDataset = MnistDataset::create(root, MnistDataset::Mode::TRAIN);
|
|
|
|
auto trainSampler = std::make_shared<RandomSampler>(trainDataset.get<MnistDataset>()->size());
|
|
|
|
const size_t trainBatchSize = 7;
|
|
const size_t trainNumWorkers = 4;
|
|
auto trainConfig = std::make_shared<DataLoaderConfig>(trainBatchSize, trainNumWorkers);
|
|
|
|
DataLoader trainDataLoader(trainDataset.mDataset, trainSampler, trainConfig);
|
|
|
|
auto images = trainDataset.get<MnistDataset>()->images();
|
|
auto labels = trainDataset.get<MnistDataset>()->labels();
|
|
const int32_t kImageRows = 28;
|
|
const int32_t kImageColumns = 28;
|
|
|
|
const size_t iterations = trainDatasetSize / trainBatchSize;
|
|
|
|
auto samplerIndices = trainSampler->indices();
|
|
sort(samplerIndices.begin(), samplerIndices.end());
|
|
for (int i = 0; i < samplerIndices.size(); i++) {
|
|
MNN_ASSERT(samplerIndices[i] == i);
|
|
}
|
|
|
|
for (int i = 0; i < iterations; i++) {
|
|
auto trainData = trainDataLoader.next();
|
|
|
|
for (int j = 0; j < trainData.size(); j++) {
|
|
auto index = int(trainData[j].first[1]->readMap<float>()[0]);
|
|
|
|
auto data = trainData[j].first[0]->readMap<uint8_t>();
|
|
auto label = trainData[j].second[0]->readMap<uint8_t>();
|
|
|
|
auto trueData = images->readMap<uint8_t>() + kImageRows * kImageColumns * index;
|
|
auto trueLabel = labels->readMap<uint8_t>() + index;
|
|
|
|
for (int k = 0; k < kImageRows * kImageColumns; k++) {
|
|
MNN_ASSERT(data[k] == trueData[k]);
|
|
}
|
|
MNN_ASSERT(label[0] == trueLabel[0]);
|
|
}
|
|
}
|
|
trainDataLoader.clean();
|
|
|
|
passedTestCount++;
|
|
cout << "[" << passedTestCount << " / " << testCount << "] passed." << endl;
|
|
|
|
// the lambda transform for one example, we also can do it in batch
|
|
auto trainLambdaTransform = std::make_shared<LambdaTransform>(func);
|
|
auto trainLambdaTransDataset = std::make_shared<BatchTransformDataset>(trainDataset.mDataset, trainLambdaTransform);
|
|
|
|
DataLoader trainLambdaDataLoader(trainLambdaTransDataset, trainSampler, trainConfig);
|
|
|
|
samplerIndices = trainSampler->indices();
|
|
sort(samplerIndices.begin(), samplerIndices.end());
|
|
for (int i = 0; i < samplerIndices.size(); i++) {
|
|
MNN_ASSERT(samplerIndices[i] == i);
|
|
}
|
|
|
|
for (int i = 0; i < iterations; i++) {
|
|
auto trainData = trainLambdaDataLoader.next();
|
|
|
|
for (int j = 0; j < trainData.size(); j++) {
|
|
auto index = int(trainData[j].first[1]->readMap<float>()[0]);
|
|
|
|
auto data = trainData[j].first[0]->readMap<float>();
|
|
auto label = trainData[j].second[0]->readMap<uint8_t>();
|
|
|
|
auto trueData = images->readMap<uint8_t>() + kImageRows * kImageColumns * index;
|
|
auto trueLabel = labels->readMap<uint8_t>() + index;
|
|
|
|
for (int k = 0; k < kImageRows * kImageColumns; k++) {
|
|
MNN_ASSERT(fabs(data[k] - (trueData[k] / 255.0f)) < 1e-6);
|
|
}
|
|
MNN_ASSERT(label[0] == trueLabel[0]);
|
|
}
|
|
}
|
|
trainLambdaDataLoader.clean();
|
|
|
|
passedTestCount++;
|
|
cout << "[" << passedTestCount << " / " << testCount << "] passed." << endl;
|
|
|
|
// the stack transform, stack [1, 28, 28] to [n, 1, 28, 28]
|
|
auto trainStackTransform = std::make_shared<StackTransform>();
|
|
auto trainStackTransDataset = std::make_shared<BatchTransformDataset>(trainDataset.mDataset, trainStackTransform);
|
|
|
|
DataLoader trainStackDataLoader(trainStackTransDataset, trainSampler, trainConfig);
|
|
|
|
samplerIndices = trainSampler->indices();
|
|
sort(samplerIndices.begin(), samplerIndices.end());
|
|
for (int i = 0; i < samplerIndices.size(); i++) {
|
|
MNN_ASSERT(samplerIndices[i] == i);
|
|
}
|
|
|
|
for (int i = 0; i < iterations; i++) {
|
|
auto trainData = trainStackDataLoader.next();
|
|
|
|
auto data = trainData[0].first[0]->readMap<uint8_t>();
|
|
auto label = trainData[0].second[0]->readMap<uint8_t>();
|
|
|
|
for (int j = 0; j < trainBatchSize; j++) {
|
|
auto index = int(trainData[0].first[1]->readMap<float>()[j]);
|
|
|
|
auto trueData = images->readMap<uint8_t>() + kImageRows * kImageColumns * index;
|
|
auto trueLabel = labels->readMap<uint8_t>() + index;
|
|
|
|
for (int k = 0; k < kImageRows * kImageColumns; k++) {
|
|
int dataIndex = j * (kImageRows * kImageColumns) + k;
|
|
MNN_ASSERT(data[dataIndex] == trueData[k]);
|
|
}
|
|
MNN_ASSERT(label[j] == trueLabel[0]);
|
|
}
|
|
}
|
|
trainStackDataLoader.clean();
|
|
|
|
passedTestCount++;
|
|
cout << "[" << passedTestCount << " / " << testCount << "] passed." << endl;
|
|
|
|
// here we test Lambda + Stack
|
|
auto trainLambdaStackTransDataset =
|
|
std::make_shared<BatchTransformDataset>(trainLambdaTransDataset, trainStackTransform);
|
|
|
|
DataLoader trainLambdaStackDataLoader(trainLambdaStackTransDataset, trainSampler, trainConfig);
|
|
|
|
samplerIndices = trainSampler->indices();
|
|
sort(samplerIndices.begin(), samplerIndices.end());
|
|
for (int i = 0; i < samplerIndices.size(); i++) {
|
|
MNN_ASSERT(samplerIndices[i] == i);
|
|
}
|
|
|
|
for (int i = 0; i < iterations; i++) {
|
|
auto trainData = trainLambdaStackDataLoader.next();
|
|
|
|
auto data = trainData[0].first[0]->readMap<float>();
|
|
auto label = trainData[0].second[0]->readMap<uint8_t>();
|
|
|
|
for (int j = 0; j < trainBatchSize; j++) {
|
|
auto index = int(trainData[0].first[1]->readMap<float>()[j]);
|
|
|
|
auto trueData = images->readMap<uint8_t>() + kImageRows * kImageColumns * index;
|
|
auto trueLabel = labels->readMap<uint8_t>() + index;
|
|
|
|
for (int k = 0; k < kImageRows * kImageColumns; k++) {
|
|
int dataIndex = j * (kImageRows * kImageColumns) + k;
|
|
MNN_ASSERT(fabs(data[dataIndex] - (trueData[k] / 255.0f)) < 1e-6);
|
|
}
|
|
MNN_ASSERT(label[j] == trueLabel[0]);
|
|
}
|
|
}
|
|
trainLambdaStackDataLoader.clean();
|
|
|
|
passedTestCount++;
|
|
cout << "[" << passedTestCount << " / " << testCount << "] passed." << endl;
|
|
|
|
// here we test Stack + Lambda
|
|
auto trainStackLambdaTransDataset =
|
|
std::make_shared<BatchTransformDataset>(trainStackTransDataset, trainLambdaTransform);
|
|
|
|
DataLoader trainStackLamdaDataLoader(trainStackLambdaTransDataset, trainSampler, trainConfig);
|
|
|
|
samplerIndices = trainSampler->indices();
|
|
sort(samplerIndices.begin(), samplerIndices.end());
|
|
for (int i = 0; i < samplerIndices.size(); i++) {
|
|
MNN_ASSERT(samplerIndices[i] == i);
|
|
}
|
|
|
|
for (int i = 0; i < iterations; i++) {
|
|
auto trainData = trainStackLamdaDataLoader.next();
|
|
|
|
auto data = trainData[0].first[0]->readMap<float>();
|
|
auto label = trainData[0].second[0]->readMap<uint8_t>();
|
|
|
|
for (int j = 0; j < trainBatchSize; j++) {
|
|
auto index = int(trainData[0].first[1]->readMap<float>()[j]);
|
|
|
|
auto trueData = images->readMap<uint8_t>() + kImageRows * kImageColumns * index;
|
|
auto trueLabel = labels->readMap<uint8_t>() + index;
|
|
|
|
for (int k = 0; k < kImageRows * kImageColumns; k++) {
|
|
int dataIndex = j * (kImageRows * kImageColumns) + k;
|
|
MNN_ASSERT(fabs(data[dataIndex] - (trueData[k] / 255.0f)) < 1e-6);
|
|
}
|
|
MNN_ASSERT(label[j] == trueLabel[0]);
|
|
}
|
|
}
|
|
trainStackLamdaDataLoader.clean();
|
|
|
|
passedTestCount++;
|
|
cout << "[" << passedTestCount << " / " << testCount << "] passed." << endl;
|
|
|
|
// test makeDataLoader
|
|
auto madeDataLoader = std::shared_ptr<DataLoader>(DataLoader::makeDataLoader(
|
|
trainDataset.mDataset, {nullptr, trainStackTransform, nullptr, trainLambdaTransform, nullptr}, 7));
|
|
|
|
for (int i = 0; i < iterations; i++) {
|
|
auto trainData = madeDataLoader->next();
|
|
|
|
auto data = trainData[0].first[0]->readMap<float>();
|
|
auto label = trainData[0].second[0]->readMap<uint8_t>();
|
|
|
|
for (int j = 0; j < trainBatchSize; j++) {
|
|
auto index = int(trainData[0].first[1]->readMap<float>()[j]);
|
|
|
|
auto trueData = images->readMap<uint8_t>() + kImageRows * kImageColumns * index;
|
|
auto trueLabel = labels->readMap<uint8_t>() + index;
|
|
|
|
for (int k = 0; k < kImageRows * kImageColumns; k++) {
|
|
int dataIndex = j * (kImageRows * kImageColumns) + k;
|
|
MNN_ASSERT(fabs(data[dataIndex] - (trueData[k] / 255.0f)) < 1e-6);
|
|
}
|
|
MNN_ASSERT(label[j] == trueLabel[0]);
|
|
}
|
|
}
|
|
madeDataLoader->clean();
|
|
|
|
passedTestCount++;
|
|
cout << "[" << passedTestCount << " / " << testCount << "] passed." << endl;
|
|
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
DemoUnitSetRegister(DataLoaderTest, "DataLoaderTest");
|