본문으로 바로가기

Vector 압축

category Devlogs 2024. 3. 8. 10:07

진행중인 프로그램의 변수들을 파일로 저장하는 기능을 구현중인데..
vector의 크기가 너무 크다보니.. 이걸 파일로 그냥 저장하면 용량이.. 너무 커짐..
그래서 시간 대비 압축 효율이 좋으면 저장된 파일도 작으니.. 좋고해서..
테스트해봄.. 7z의 lzma 알고리즘 사용.

#include <vector>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <fstream>
#include <chrono>
 
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/lzma.hpp>
#include <boost/lambda/lambda.hpp>
 
 
#define DATA_COUNT 3000000
#define COMP_LEVEL 1 // compress level 9 - best size
 
template<typename T>
size_t vectorsizeof(const typename std::vector<T>& vec)
{
    return sizeof(T) * vec.size();
}
 
int main()
{
    std::chrono::system_clock::time_point start;
    std::chrono::duration<double> sec;
    
    // ------------------
    std::vector<unsigned int> vec;
    for(int i=0; i<DATA_COUNT; i++)
        vec.push_back(i);
 
 
    // -----------------------------------------------
    {
        std::cout << "NORMAL MODE" << std::endl;
 
        start = std::chrono::system_clock::now();
        FILE *fp=fopen("a.dat", "wb");
        fwrite(vec.data(), sizeof(decltype(vec)::value_type), vec.size(), fp);
        fclose(fp);
        sec = std::chrono::system_clock::now() - start;
        std::cout << "#2 Time passed (sec) : " << sec.count() << " seconds" << std::endl;
 
        // ------------------
        start = std::chrono::system_clock::now();
        std::vector<unsigned int> vec2;
        vec2.resize(DATA_COUNT);
        FILE *fp2 = fopen("a.dat", "rb");
        fread(vec2.data(), sizeof(decltype(vec2)::value_type), DATA_COUNT, fp2);
        fclose(fp2);
        sec = std::chrono::system_clock::now() - start;
        std::cout << "#3 Time passed (sec) : " << sec.count() << " seconds" << std::endl;
 
        std::cout << vec2[0] << "\n";
        std::cout << vec2[DATA_COUNT-1] << "\n";
 
    }
 
    // -----------------------------------------------
    {
        std::cout << "COMPRESSION MODE" << std::endl;
 
        start = std::chrono::system_clock::now();
        std::stringstream compressed;
        boost::iostreams::filtering_streambuf<boost::iostreams::input> out;
        out.push(boost::iostreams::lzma_compressor(boost::iostreams::lzma_params(COMP_LEVEL)));
        out.push(boost::iostreams::array_source(reinterpret_cast<const char*>(vec.data()), vectorsizeof(vec)));
        boost::iostreams::copy(out, compressed);
 
        const std::string tmp = compressed.str();
        const char* cstr = tmp.c_str();
 
        FILE *fp=fopen("b.dat", "wb");
        fwrite(cstr, 1, tmp.size(), fp);
        fclose(fp);
 
        sec = std::chrono::system_clock::now() - start;
        std::cout << "#2 Time passed (sec) : " << sec.count() << " seconds" << std::endl;
 
 
        start = std::chrono::system_clock::now();
 
        std::ifstream _compressed("b.dat", std::ios::binary);
        std::stringstream decompressed;
 
        boost::iostreams::filtering_streambuf<boost::iostreams::input> in;
        in.push(boost::iostreams::lzma_decompressor());
        in.push(_compressed);
        boost::iostreams::copy(in, decompressed);
 
        std::vector<unsigned int> vec2;
        vec2.resize(DATA_COUNT);
        memcpy(vec2.data(), decompressed.str().c_str(), vectorsizeof(vec2));
 
        sec = std::chrono::system_clock::now() - start;
        std::cout << "#3 Time passed (sec) : " << sec.count() << " seconds" << std::endl;
 
        std::cout << vec2[0] << "\n";
        std::cout << vec2[DATA_COUNT-1] << "\n";
    }
 
    return 0;
}


raw 데이타 저장/ 압축후 저장

[MIN@DESKTOP-RSH0QT3 xx]$ g++ -I. a.cpp -lboost_iostreams-mt
[MIN@DESKTOP-RSH0QT3 xx]$ ./a
NORMAL MODE
#2 Time passed (sec) : 0.0039529 seconds
#3 Time passed (sec) : 0.0068222 seconds
0
2999999
COMPRESSION MODE
#2 Time passed (sec) : 0.461739 seconds
#3 Time passed (sec) : 0.111355 seconds
0
2999999


압축된 벡터를 저장하고 있는 파일 사이즈..
약 55배의 시간이 더 드나.. 50배의 용량을 줄일수있음..

-rw-r--r-- 1 MIN 없음  12M Mar 21 13:23 a.dat
-rw-r--r-- 1 MIN 없음 232K Mar 21 13:23 b.dat


수치상으로는 시간이 55배 더드는데.. 사람이 느끼기엔.. 그닥 큰 시간은 아니라..
이정도로 만족..


Korea Tcl/Tk Community
블로그 이미지 ihmin 님의 블로그
VISITOR 오늘 / 전체