first archive version working, working on new bitstream format
This commit is contained in:
BIN
all.tar.gz
BIN
all.tar.gz
Binary file not shown.
305
ccc.cpp
305
ccc.cpp
@@ -1,3 +1,5 @@
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
@@ -166,26 +168,15 @@ struct symbol {
|
||||
string name;
|
||||
int score;
|
||||
};
|
||||
struct node {
|
||||
map<unsigned char,node*> children;
|
||||
int token_id=-1;
|
||||
};
|
||||
void insert(node* root,string str,int id) {
|
||||
node* curr=root;
|
||||
for (char c:str) {
|
||||
if (curr->children.find(c)==curr->children.end()) {
|
||||
curr->children[c]=new node();
|
||||
}
|
||||
curr=curr->children[c];
|
||||
}
|
||||
curr->token_id=id;
|
||||
}
|
||||
struct processed_file {
|
||||
string path;
|
||||
uint32_t payload_size;
|
||||
vector<unsigned char> payload;
|
||||
bool is_payload_compressed;
|
||||
#pragma pack(push,1)
|
||||
struct header {
|
||||
uint8_t sig[3];
|
||||
uint8_t flags;
|
||||
size_t size_rec_table;
|
||||
size_t entry_count;
|
||||
size_t size_payload;
|
||||
};
|
||||
#pragma pack(pop)
|
||||
map<string,vector<TSNode>> all_tokens;
|
||||
map<string,int> rec_map;
|
||||
vector<string> rec_list;
|
||||
@@ -310,6 +301,7 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
|
||||
vector<bool> out;
|
||||
for (int i=0;i<nodes->size();i++) {
|
||||
string type=string(ts_node_type(nodes->at(i)));
|
||||
string supertext=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
|
||||
if (type=="#if") {
|
||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IF);
|
||||
print_debug("if");
|
||||
@@ -349,7 +341,7 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
|
||||
} else if (type=="#warning") {
|
||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_WARNING);
|
||||
print_debug("warning");
|
||||
} else if (type=="#pragma") {
|
||||
} else if (type=="#pragma" || (type=="preproc_directive" && supertext=="#pragma")) {
|
||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_PRAGMA);
|
||||
print_debug("pragma");
|
||||
} else if (type=="#line") {
|
||||
@@ -553,131 +545,156 @@ int main(int argc,char **argv) {
|
||||
files_content.push_back(code);
|
||||
}
|
||||
construct_rec_table(files_content,files);
|
||||
vector<processed_file> files_archive;
|
||||
vector<unsigned char> files_archive;
|
||||
vector<size_t> payloads_size;
|
||||
vector<size_t> payloads_start;
|
||||
for (int i=0;i<files_content.size();i++) {
|
||||
processed_file pfile;
|
||||
pfile.path=files[i];
|
||||
auto payload_bytes=process_file_nodes(&(all_tokens.at(pfile.path)),files_content[i],rec_list);
|
||||
vector<unsigned char> payload_compressed;
|
||||
payload_compressed.resize(payload_bytes.size()+payload_bytes.size()/3+128);
|
||||
lzma_stream strm=LZMA_STREAM_INIT;
|
||||
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
|
||||
cout<<"Error: couldn't initialize LZMA compressor for file: "<<files[i]<<endl;
|
||||
return -1;
|
||||
}
|
||||
strm.next_in=payload_bytes.data();
|
||||
strm.avail_in=payload_bytes.size();
|
||||
strm.next_out=payload_compressed.data();
|
||||
strm.avail_out=payload_compressed.size();
|
||||
auto ret=lzma_code(&strm,LZMA_FINISH);
|
||||
if (ret!=LZMA_STREAM_END) {
|
||||
cout<<"Error: couldn't compress payload for file: "<<files[i]<<endl;
|
||||
return -1;
|
||||
}
|
||||
size_t compressed_size=payload_compressed.size()-strm.avail_out;
|
||||
payload_compressed.resize(compressed_size);
|
||||
size_t original_size=payload_bytes.size();
|
||||
lzma_end(&strm);
|
||||
if (compressed_size>=original_size) {
|
||||
pfile.is_payload_compressed=false;
|
||||
pfile.payload=payload_bytes;
|
||||
pfile.payload_size=original_size;
|
||||
} else {
|
||||
pfile.is_payload_compressed=true;
|
||||
pfile.payload=payload_compressed;
|
||||
pfile.payload_size=compressed_size;
|
||||
}
|
||||
auto payload_bytes=process_file_nodes(&(all_tokens.at(files[i])),files_content[i],rec_list);
|
||||
payloads_size.push_back(payload_bytes.size());
|
||||
payloads_start.push_back(files_archive.size());
|
||||
CCC_ADD_COMPOMENT(files_archive,payload_bytes);
|
||||
cout<<i+1<<" file(s) done on "<<files.size()<<": "<<files[i]<<endl;
|
||||
}
|
||||
exit(0);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// auto payload=process_all_nodes(&all_tokens[0],files_content[0],rec_list);
|
||||
// vector<unsigned char> rec_table;
|
||||
// for (int i=0;i<rec_list.size();i++) {
|
||||
// for (auto c:rec_list[i]) {
|
||||
// rec_table.push_back(c);
|
||||
// }
|
||||
// rec_table.push_back('\0');
|
||||
// }
|
||||
// vector<unsigned char> rec_table_compressed;
|
||||
// rec_table_compressed.resize(rec_table.size()+rec_table.size()/3+128);
|
||||
// lzma_stream strm=LZMA_STREAM_INIT;
|
||||
// if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
|
||||
// cout<<"Error: couldn't initialize LZMA compressor."<<endl;
|
||||
// return -1;
|
||||
// }
|
||||
// strm.next_in=rec_table.data();
|
||||
// strm.avail_in=rec_table.size();
|
||||
// strm.next_out=rec_table_compressed.data();
|
||||
// strm.avail_out=rec_table_compressed.size();
|
||||
// lzma_ret ret=lzma_code(&strm,LZMA_FINISH);
|
||||
// if (ret!=LZMA_STREAM_END) {
|
||||
// cout<<"Error: couldn't compress reccurences table."<<endl;
|
||||
// return -1;
|
||||
// }
|
||||
// size_t compressed_size=rec_table_compressed.size()-strm.avail_out;
|
||||
// rec_table_compressed.resize(compressed_size);
|
||||
// size_t original_size=rec_table.size();
|
||||
// lzma_end(&strm);
|
||||
// vector<unsigned char> out;
|
||||
// out.push_back('C');
|
||||
// out.push_back(compressed_size>=original_size?'C':'c');
|
||||
// vector<unsigned char> payload_bytes;
|
||||
// unsigned char current=0;
|
||||
// size_t bit_index=0;
|
||||
// for (bool b:payload) {
|
||||
// current|=(b<<(7-bit_index));
|
||||
// bit_index++;
|
||||
// if (bit_index==8) {
|
||||
// payload_bytes.push_back(current);
|
||||
// current=0;
|
||||
// bit_index=0;
|
||||
// }
|
||||
// }
|
||||
// if (bit_index!=0) {
|
||||
// payload_bytes.push_back(current);
|
||||
// }
|
||||
// vector<unsigned char> payload_compressed;
|
||||
// payload_compressed.resize(payload_bytes.size()+payload_bytes.size()/3+128);
|
||||
// strm=LZMA_STREAM_INIT;
|
||||
// if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
|
||||
// cout<<"Error: couldn't initialize LZMA compressor."<<endl;
|
||||
// return -1;
|
||||
// }
|
||||
// strm.next_in=payload_bytes.data();
|
||||
// strm.avail_in=payload_bytes.size();
|
||||
// strm.next_out=payload_compressed.data();
|
||||
// strm.avail_out=payload_compressed.size();
|
||||
// ret=lzma_code(&strm,LZMA_FINISH);
|
||||
// if (ret!=LZMA_STREAM_END) {
|
||||
// cout<<"Error: couldn't compress reccurences table."<<endl;
|
||||
// return -1;
|
||||
// }
|
||||
// size_t compressed_size1=payload_compressed.size()-strm.avail_out;
|
||||
// payload_compressed.resize(compressed_size1);
|
||||
// size_t original_size1=payload_bytes.size();
|
||||
// lzma_end(&strm);
|
||||
// out.push_back(compressed_size1>=original_size1?'C':'c');
|
||||
// if (compressed_size>=original_size) {
|
||||
// CCC_ADD_COMPOMENT(out,rec_table);
|
||||
// } else {
|
||||
// CCC_ADD_COMPOMENT(out,rec_table_compressed);
|
||||
// }
|
||||
// if (compressed_size1>=original_size1) {
|
||||
// CCC_ADD_COMPOMENT(out,payload_bytes);
|
||||
// } else {
|
||||
// CCC_ADD_COMPOMENT(out,payload_compressed);
|
||||
// }
|
||||
// ofstream fileout("test.ccc",ios::binary);
|
||||
// if (!fileout) {
|
||||
// cout<<"Error: couldn't open output file."<<endl;
|
||||
// return -1;
|
||||
// }
|
||||
// fileout.write(reinterpret_cast<const char*>(out.data()),out.size());
|
||||
// fileout.close();
|
||||
// return 0;
|
||||
vector<unsigned char> payload_compressed;
|
||||
payload_compressed.resize(files_archive.size()+files_archive.size()/3+128);
|
||||
lzma_stream strm=LZMA_STREAM_INIT;
|
||||
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
|
||||
cout<<"Error: couldn't initialize LZMA compressor for file archive."<<endl;
|
||||
return -1;
|
||||
}
|
||||
strm.next_in=files_archive.data();
|
||||
strm.avail_in=files_archive.size();
|
||||
strm.next_out=payload_compressed.data();
|
||||
strm.avail_out=payload_compressed.size();
|
||||
auto ret=lzma_code(&strm,LZMA_FINISH);
|
||||
if (ret!=LZMA_STREAM_END) {
|
||||
cout<<"Error: couldn't compress file archive."<<endl;
|
||||
return -1;
|
||||
}
|
||||
size_t payload_total_size;
|
||||
size_t compressed_size=payload_compressed.size()-strm.avail_out;
|
||||
payload_compressed.resize(compressed_size);
|
||||
size_t original_size=files_archive.size();
|
||||
lzma_end(&strm);
|
||||
uint8_t flags=0;
|
||||
if (compressed_size>=original_size) {
|
||||
flags&= ~(0b00000001);
|
||||
payload_total_size=original_size;
|
||||
} else {
|
||||
flags|=0b00000001;
|
||||
payload_total_size=compressed_size;
|
||||
}
|
||||
vector<unsigned char> rec_table;
|
||||
for (int i=0;i<rec_list.size();i++) {
|
||||
for (auto c:rec_list[i]) {
|
||||
rec_table.push_back(c);
|
||||
}
|
||||
rec_table.push_back('\0');
|
||||
}
|
||||
vector<unsigned char> rec_table_compressed;
|
||||
rec_table_compressed.resize(rec_table.size()+rec_table.size()/3+128);
|
||||
strm=LZMA_STREAM_INIT;
|
||||
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
|
||||
cout<<"Error: couldn't initialize LZMA compressor for reccurences table."<<endl;
|
||||
return -1;
|
||||
}
|
||||
strm.next_in=rec_table.data();
|
||||
strm.avail_in=rec_table.size();
|
||||
strm.next_out=rec_table_compressed.data();
|
||||
strm.avail_out=rec_table_compressed.size();
|
||||
ret=lzma_code(&strm,LZMA_FINISH);
|
||||
if (ret!=LZMA_STREAM_END) {
|
||||
cout<<"Error: couldn't compress reccurences table."<<endl;
|
||||
return -1;
|
||||
}
|
||||
size_t rec_table_total_size;
|
||||
compressed_size=rec_table_compressed.size()-strm.avail_out;
|
||||
rec_table_compressed.resize(compressed_size);
|
||||
original_size=rec_table.size();
|
||||
lzma_end(&strm);
|
||||
if (compressed_size>=original_size) {
|
||||
flags&= ~(0b00000010);
|
||||
rec_table_total_size=original_size;
|
||||
} else {
|
||||
flags|=0b00000010;
|
||||
rec_table_total_size=compressed_size;
|
||||
}
|
||||
vector<unsigned char> files_table;
|
||||
for (int i=0;i<files.size();i++) {
|
||||
for (auto c:files[i]) {
|
||||
files_table.push_back(c);
|
||||
}
|
||||
files_table.push_back('\0');
|
||||
auto file_start=payloads_start[i];
|
||||
for (int i=0;i<sizeof(size_t);++i) {
|
||||
files_table.push_back(((uint8_t*)&file_start)[i]);
|
||||
}
|
||||
auto file_size=payloads_size[i];
|
||||
for (int i=0;i<sizeof(size_t);++i) {
|
||||
files_table.push_back(((uint8_t*)&file_size)[i]);
|
||||
}
|
||||
}
|
||||
vector<unsigned char> files_table_compressed;
|
||||
files_table_compressed.resize(files_table.size()+files_table.size()/3+128);
|
||||
strm=LZMA_STREAM_INIT;
|
||||
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
|
||||
cout<<"Error: couldn't initialize LZMA compressor for files table."<<endl;
|
||||
return -1;
|
||||
}
|
||||
strm.next_in=files_table.data();
|
||||
strm.avail_in=files_table.size();
|
||||
strm.next_out=files_table_compressed.data();
|
||||
strm.avail_out=files_table_compressed.size();
|
||||
ret=lzma_code(&strm,LZMA_FINISH);
|
||||
if (ret!=LZMA_STREAM_END) {
|
||||
cout<<"Error: couldn't compress files table."<<endl;
|
||||
return -1;
|
||||
}
|
||||
size_t files_table_total_size;
|
||||
compressed_size=files_table_compressed.size()-strm.avail_out;
|
||||
files_table_compressed.resize(compressed_size);
|
||||
original_size=files_table.size();
|
||||
lzma_end(&strm);
|
||||
if (compressed_size>=original_size) {
|
||||
flags&= ~(0b00000100);
|
||||
files_table_total_size=original_size;
|
||||
} else {
|
||||
flags|=0b00000100;
|
||||
files_table_total_size=compressed_size;
|
||||
}
|
||||
header head;
|
||||
head.sig[0]='C';
|
||||
head.sig[1]='C';
|
||||
head.sig[2]='C';
|
||||
head.flags=flags;
|
||||
head.size_payload=payload_total_size;
|
||||
head.size_rec_table=rec_table_total_size;
|
||||
head.entry_count=files.size();
|
||||
vector<unsigned char> out;
|
||||
for (int i=0;i<sizeof(header);i++) {
|
||||
out.push_back(((uint8_t*)&head)[i]);
|
||||
}
|
||||
if (flags & 0b00000010) {
|
||||
CCC_ADD_COMPOMENT(out,rec_table_compressed);
|
||||
} else {
|
||||
CCC_ADD_COMPOMENT(out,rec_table);
|
||||
}
|
||||
if (flags & 0b00000100) {
|
||||
CCC_ADD_COMPOMENT(out,files_table_compressed);
|
||||
} else {
|
||||
CCC_ADD_COMPOMENT(out,files_table);
|
||||
}
|
||||
if (flags & 0b00000001) {
|
||||
CCC_ADD_COMPOMENT(out,payload_compressed);
|
||||
} else {
|
||||
CCC_ADD_COMPOMENT(out,files_archive);
|
||||
}
|
||||
ofstream fileout("test.ccc",ios::binary);
|
||||
if (!fileout) {
|
||||
cout<<"Error: couldn't open output file."<<endl;
|
||||
return -1;
|
||||
}
|
||||
fileout.write(reinterpret_cast<const char*>(out.data()),out.size());
|
||||
fileout.close();
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user