first archive version working, working on new bitstream format

This commit is contained in:
2026-02-05 21:18:21 +01:00
parent f2640c70a5
commit f281c71f75
3 changed files with 161 additions and 144 deletions

Binary file not shown.

305
ccc.cpp
View File

@@ -1,3 +1,5 @@
#include <cstdint>
#include <cstring>
#include <iostream>
#include <filesystem>
#include <fstream>
@@ -166,26 +168,15 @@ struct symbol {
string name;
int score;
};
struct node {
map<unsigned char,node*> children;
int token_id=-1;
};
void insert(node* root,string str,int id) {
node* curr=root;
for (char c:str) {
if (curr->children.find(c)==curr->children.end()) {
curr->children[c]=new node();
}
curr=curr->children[c];
}
curr->token_id=id;
}
struct processed_file {
string path;
uint32_t payload_size;
vector<unsigned char> payload;
bool is_payload_compressed;
#pragma pack(push,1)
struct header {
uint8_t sig[3];
uint8_t flags;
size_t size_rec_table;
size_t entry_count;
size_t size_payload;
};
#pragma pack(pop)
map<string,vector<TSNode>> all_tokens;
map<string,int> rec_map;
vector<string> rec_list;
@@ -310,6 +301,7 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
vector<bool> out;
for (int i=0;i<nodes->size();i++) {
string type=string(ts_node_type(nodes->at(i)));
string supertext=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
if (type=="#if") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IF);
print_debug("if");
@@ -349,7 +341,7 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
} else if (type=="#warning") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_WARNING);
print_debug("warning");
} else if (type=="#pragma") {
} else if (type=="#pragma" || (type=="preproc_directive" && supertext=="#pragma")) {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_PRAGMA);
print_debug("pragma");
} else if (type=="#line") {
@@ -553,131 +545,156 @@ int main(int argc,char **argv) {
files_content.push_back(code);
}
construct_rec_table(files_content,files);
vector<processed_file> files_archive;
vector<unsigned char> files_archive;
vector<size_t> payloads_size;
vector<size_t> payloads_start;
for (int i=0;i<files_content.size();i++) {
processed_file pfile;
pfile.path=files[i];
auto payload_bytes=process_file_nodes(&(all_tokens.at(pfile.path)),files_content[i],rec_list);
vector<unsigned char> payload_compressed;
payload_compressed.resize(payload_bytes.size()+payload_bytes.size()/3+128);
lzma_stream strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for file: "<<files[i]<<endl;
return -1;
}
strm.next_in=payload_bytes.data();
strm.avail_in=payload_bytes.size();
strm.next_out=payload_compressed.data();
strm.avail_out=payload_compressed.size();
auto ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress payload for file: "<<files[i]<<endl;
return -1;
}
size_t compressed_size=payload_compressed.size()-strm.avail_out;
payload_compressed.resize(compressed_size);
size_t original_size=payload_bytes.size();
lzma_end(&strm);
if (compressed_size>=original_size) {
pfile.is_payload_compressed=false;
pfile.payload=payload_bytes;
pfile.payload_size=original_size;
} else {
pfile.is_payload_compressed=true;
pfile.payload=payload_compressed;
pfile.payload_size=compressed_size;
}
auto payload_bytes=process_file_nodes(&(all_tokens.at(files[i])),files_content[i],rec_list);
payloads_size.push_back(payload_bytes.size());
payloads_start.push_back(files_archive.size());
CCC_ADD_COMPOMENT(files_archive,payload_bytes);
cout<<i+1<<" file(s) done on "<<files.size()<<": "<<files[i]<<endl;
}
exit(0);
// auto payload=process_all_nodes(&all_tokens[0],files_content[0],rec_list);
// vector<unsigned char> rec_table;
// for (int i=0;i<rec_list.size();i++) {
// for (auto c:rec_list[i]) {
// rec_table.push_back(c);
// }
// rec_table.push_back('\0');
// }
// vector<unsigned char> rec_table_compressed;
// rec_table_compressed.resize(rec_table.size()+rec_table.size()/3+128);
// lzma_stream strm=LZMA_STREAM_INIT;
// if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
// cout<<"Error: couldn't initialize LZMA compressor."<<endl;
// return -1;
// }
// strm.next_in=rec_table.data();
// strm.avail_in=rec_table.size();
// strm.next_out=rec_table_compressed.data();
// strm.avail_out=rec_table_compressed.size();
// lzma_ret ret=lzma_code(&strm,LZMA_FINISH);
// if (ret!=LZMA_STREAM_END) {
// cout<<"Error: couldn't compress reccurences table."<<endl;
// return -1;
// }
// size_t compressed_size=rec_table_compressed.size()-strm.avail_out;
// rec_table_compressed.resize(compressed_size);
// size_t original_size=rec_table.size();
// lzma_end(&strm);
// vector<unsigned char> out;
// out.push_back('C');
// out.push_back(compressed_size>=original_size?'C':'c');
// vector<unsigned char> payload_bytes;
// unsigned char current=0;
// size_t bit_index=0;
// for (bool b:payload) {
// current|=(b<<(7-bit_index));
// bit_index++;
// if (bit_index==8) {
// payload_bytes.push_back(current);
// current=0;
// bit_index=0;
// }
// }
// if (bit_index!=0) {
// payload_bytes.push_back(current);
// }
// vector<unsigned char> payload_compressed;
// payload_compressed.resize(payload_bytes.size()+payload_bytes.size()/3+128);
// strm=LZMA_STREAM_INIT;
// if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
// cout<<"Error: couldn't initialize LZMA compressor."<<endl;
// return -1;
// }
// strm.next_in=payload_bytes.data();
// strm.avail_in=payload_bytes.size();
// strm.next_out=payload_compressed.data();
// strm.avail_out=payload_compressed.size();
// ret=lzma_code(&strm,LZMA_FINISH);
// if (ret!=LZMA_STREAM_END) {
// cout<<"Error: couldn't compress reccurences table."<<endl;
// return -1;
// }
// size_t compressed_size1=payload_compressed.size()-strm.avail_out;
// payload_compressed.resize(compressed_size1);
// size_t original_size1=payload_bytes.size();
// lzma_end(&strm);
// out.push_back(compressed_size1>=original_size1?'C':'c');
// if (compressed_size>=original_size) {
// CCC_ADD_COMPOMENT(out,rec_table);
// } else {
// CCC_ADD_COMPOMENT(out,rec_table_compressed);
// }
// if (compressed_size1>=original_size1) {
// CCC_ADD_COMPOMENT(out,payload_bytes);
// } else {
// CCC_ADD_COMPOMENT(out,payload_compressed);
// }
// ofstream fileout("test.ccc",ios::binary);
// if (!fileout) {
// cout<<"Error: couldn't open output file."<<endl;
// return -1;
// }
// fileout.write(reinterpret_cast<const char*>(out.data()),out.size());
// fileout.close();
// return 0;
vector<unsigned char> payload_compressed;
payload_compressed.resize(files_archive.size()+files_archive.size()/3+128);
lzma_stream strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for file archive."<<endl;
return -1;
}
strm.next_in=files_archive.data();
strm.avail_in=files_archive.size();
strm.next_out=payload_compressed.data();
strm.avail_out=payload_compressed.size();
auto ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress file archive."<<endl;
return -1;
}
size_t payload_total_size;
size_t compressed_size=payload_compressed.size()-strm.avail_out;
payload_compressed.resize(compressed_size);
size_t original_size=files_archive.size();
lzma_end(&strm);
uint8_t flags=0;
if (compressed_size>=original_size) {
flags&= ~(0b00000001);
payload_total_size=original_size;
} else {
flags|=0b00000001;
payload_total_size=compressed_size;
}
vector<unsigned char> rec_table;
for (int i=0;i<rec_list.size();i++) {
for (auto c:rec_list[i]) {
rec_table.push_back(c);
}
rec_table.push_back('\0');
}
vector<unsigned char> rec_table_compressed;
rec_table_compressed.resize(rec_table.size()+rec_table.size()/3+128);
strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for reccurences table."<<endl;
return -1;
}
strm.next_in=rec_table.data();
strm.avail_in=rec_table.size();
strm.next_out=rec_table_compressed.data();
strm.avail_out=rec_table_compressed.size();
ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress reccurences table."<<endl;
return -1;
}
size_t rec_table_total_size;
compressed_size=rec_table_compressed.size()-strm.avail_out;
rec_table_compressed.resize(compressed_size);
original_size=rec_table.size();
lzma_end(&strm);
if (compressed_size>=original_size) {
flags&= ~(0b00000010);
rec_table_total_size=original_size;
} else {
flags|=0b00000010;
rec_table_total_size=compressed_size;
}
vector<unsigned char> files_table;
for (int i=0;i<files.size();i++) {
for (auto c:files[i]) {
files_table.push_back(c);
}
files_table.push_back('\0');
auto file_start=payloads_start[i];
for (int i=0;i<sizeof(size_t);++i) {
files_table.push_back(((uint8_t*)&file_start)[i]);
}
auto file_size=payloads_size[i];
for (int i=0;i<sizeof(size_t);++i) {
files_table.push_back(((uint8_t*)&file_size)[i]);
}
}
vector<unsigned char> files_table_compressed;
files_table_compressed.resize(files_table.size()+files_table.size()/3+128);
strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for files table."<<endl;
return -1;
}
strm.next_in=files_table.data();
strm.avail_in=files_table.size();
strm.next_out=files_table_compressed.data();
strm.avail_out=files_table_compressed.size();
ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress files table."<<endl;
return -1;
}
size_t files_table_total_size;
compressed_size=files_table_compressed.size()-strm.avail_out;
files_table_compressed.resize(compressed_size);
original_size=files_table.size();
lzma_end(&strm);
if (compressed_size>=original_size) {
flags&= ~(0b00000100);
files_table_total_size=original_size;
} else {
flags|=0b00000100;
files_table_total_size=compressed_size;
}
header head;
head.sig[0]='C';
head.sig[1]='C';
head.sig[2]='C';
head.flags=flags;
head.size_payload=payload_total_size;
head.size_rec_table=rec_table_total_size;
head.entry_count=files.size();
vector<unsigned char> out;
for (int i=0;i<sizeof(header);i++) {
out.push_back(((uint8_t*)&head)[i]);
}
if (flags & 0b00000010) {
CCC_ADD_COMPOMENT(out,rec_table_compressed);
} else {
CCC_ADD_COMPOMENT(out,rec_table);
}
if (flags & 0b00000100) {
CCC_ADD_COMPOMENT(out,files_table_compressed);
} else {
CCC_ADD_COMPOMENT(out,files_table);
}
if (flags & 0b00000001) {
CCC_ADD_COMPOMENT(out,payload_compressed);
} else {
CCC_ADD_COMPOMENT(out,files_archive);
}
ofstream fileout("test.ccc",ios::binary);
if (!fileout) {
cout<<"Error: couldn't open output file."<<endl;
return -1;
}
fileout.write(reinterpret_cast<const char*>(out.data()),out.size());
fileout.close();
return 0;
}

BIN
test.ccc Normal file

Binary file not shown.