first archive version working, working on new bitstream format

This commit is contained in:
2026-02-05 21:18:21 +01:00
parent f2640c70a5
commit f281c71f75
3 changed files with 161 additions and 144 deletions

Binary file not shown.

305
ccc.cpp
View File

@@ -1,3 +1,5 @@
#include <cstdint>
#include <cstring>
#include <iostream> #include <iostream>
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
@@ -166,26 +168,15 @@ struct symbol {
string name; string name;
int score; int score;
}; };
struct node { #pragma pack(push,1)
map<unsigned char,node*> children; struct header {
int token_id=-1; uint8_t sig[3];
}; uint8_t flags;
void insert(node* root,string str,int id) { size_t size_rec_table;
node* curr=root; size_t entry_count;
for (char c:str) { size_t size_payload;
if (curr->children.find(c)==curr->children.end()) {
curr->children[c]=new node();
}
curr=curr->children[c];
}
curr->token_id=id;
}
struct processed_file {
string path;
uint32_t payload_size;
vector<unsigned char> payload;
bool is_payload_compressed;
}; };
#pragma pack(pop)
map<string,vector<TSNode>> all_tokens; map<string,vector<TSNode>> all_tokens;
map<string,int> rec_map; map<string,int> rec_map;
vector<string> rec_list; vector<string> rec_list;
@@ -310,6 +301,7 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
vector<bool> out; vector<bool> out;
for (int i=0;i<nodes->size();i++) { for (int i=0;i<nodes->size();i++) {
string type=string(ts_node_type(nodes->at(i))); string type=string(ts_node_type(nodes->at(i)));
string supertext=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
if (type=="#if") { if (type=="#if") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IF); CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IF);
print_debug("if"); print_debug("if");
@@ -349,7 +341,7 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
} else if (type=="#warning") { } else if (type=="#warning") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_WARNING); CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_WARNING);
print_debug("warning"); print_debug("warning");
} else if (type=="#pragma") { } else if (type=="#pragma" || (type=="preproc_directive" && supertext=="#pragma")) {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_PRAGMA); CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_PRAGMA);
print_debug("pragma"); print_debug("pragma");
} else if (type=="#line") { } else if (type=="#line") {
@@ -553,131 +545,156 @@ int main(int argc,char **argv) {
files_content.push_back(code); files_content.push_back(code);
} }
construct_rec_table(files_content,files); construct_rec_table(files_content,files);
vector<processed_file> files_archive; vector<unsigned char> files_archive;
vector<size_t> payloads_size;
vector<size_t> payloads_start;
for (int i=0;i<files_content.size();i++) { for (int i=0;i<files_content.size();i++) {
processed_file pfile; auto payload_bytes=process_file_nodes(&(all_tokens.at(files[i])),files_content[i],rec_list);
pfile.path=files[i]; payloads_size.push_back(payload_bytes.size());
auto payload_bytes=process_file_nodes(&(all_tokens.at(pfile.path)),files_content[i],rec_list); payloads_start.push_back(files_archive.size());
vector<unsigned char> payload_compressed; CCC_ADD_COMPOMENT(files_archive,payload_bytes);
payload_compressed.resize(payload_bytes.size()+payload_bytes.size()/3+128);
lzma_stream strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for file: "<<files[i]<<endl;
return -1;
}
strm.next_in=payload_bytes.data();
strm.avail_in=payload_bytes.size();
strm.next_out=payload_compressed.data();
strm.avail_out=payload_compressed.size();
auto ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress payload for file: "<<files[i]<<endl;
return -1;
}
size_t compressed_size=payload_compressed.size()-strm.avail_out;
payload_compressed.resize(compressed_size);
size_t original_size=payload_bytes.size();
lzma_end(&strm);
if (compressed_size>=original_size) {
pfile.is_payload_compressed=false;
pfile.payload=payload_bytes;
pfile.payload_size=original_size;
} else {
pfile.is_payload_compressed=true;
pfile.payload=payload_compressed;
pfile.payload_size=compressed_size;
}
cout<<i+1<<" file(s) done on "<<files.size()<<": "<<files[i]<<endl; cout<<i+1<<" file(s) done on "<<files.size()<<": "<<files[i]<<endl;
} }
exit(0); vector<unsigned char> payload_compressed;
payload_compressed.resize(files_archive.size()+files_archive.size()/3+128);
lzma_stream strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for file archive."<<endl;
return -1;
// auto payload=process_all_nodes(&all_tokens[0],files_content[0],rec_list); }
// vector<unsigned char> rec_table; strm.next_in=files_archive.data();
// for (int i=0;i<rec_list.size();i++) { strm.avail_in=files_archive.size();
// for (auto c:rec_list[i]) { strm.next_out=payload_compressed.data();
// rec_table.push_back(c); strm.avail_out=payload_compressed.size();
// } auto ret=lzma_code(&strm,LZMA_FINISH);
// rec_table.push_back('\0'); if (ret!=LZMA_STREAM_END) {
// } cout<<"Error: couldn't compress file archive."<<endl;
// vector<unsigned char> rec_table_compressed; return -1;
// rec_table_compressed.resize(rec_table.size()+rec_table.size()/3+128); }
// lzma_stream strm=LZMA_STREAM_INIT; size_t payload_total_size;
// if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) { size_t compressed_size=payload_compressed.size()-strm.avail_out;
// cout<<"Error: couldn't initialize LZMA compressor."<<endl; payload_compressed.resize(compressed_size);
// return -1; size_t original_size=files_archive.size();
// } lzma_end(&strm);
// strm.next_in=rec_table.data(); uint8_t flags=0;
// strm.avail_in=rec_table.size(); if (compressed_size>=original_size) {
// strm.next_out=rec_table_compressed.data(); flags&= ~(0b00000001);
// strm.avail_out=rec_table_compressed.size(); payload_total_size=original_size;
// lzma_ret ret=lzma_code(&strm,LZMA_FINISH); } else {
// if (ret!=LZMA_STREAM_END) { flags|=0b00000001;
// cout<<"Error: couldn't compress reccurences table."<<endl; payload_total_size=compressed_size;
// return -1; }
// } vector<unsigned char> rec_table;
// size_t compressed_size=rec_table_compressed.size()-strm.avail_out; for (int i=0;i<rec_list.size();i++) {
// rec_table_compressed.resize(compressed_size); for (auto c:rec_list[i]) {
// size_t original_size=rec_table.size(); rec_table.push_back(c);
// lzma_end(&strm); }
// vector<unsigned char> out; rec_table.push_back('\0');
// out.push_back('C'); }
// out.push_back(compressed_size>=original_size?'C':'c'); vector<unsigned char> rec_table_compressed;
// vector<unsigned char> payload_bytes; rec_table_compressed.resize(rec_table.size()+rec_table.size()/3+128);
// unsigned char current=0; strm=LZMA_STREAM_INIT;
// size_t bit_index=0; if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
// for (bool b:payload) { cout<<"Error: couldn't initialize LZMA compressor for reccurences table."<<endl;
// current|=(b<<(7-bit_index)); return -1;
// bit_index++; }
// if (bit_index==8) { strm.next_in=rec_table.data();
// payload_bytes.push_back(current); strm.avail_in=rec_table.size();
// current=0; strm.next_out=rec_table_compressed.data();
// bit_index=0; strm.avail_out=rec_table_compressed.size();
// } ret=lzma_code(&strm,LZMA_FINISH);
// } if (ret!=LZMA_STREAM_END) {
// if (bit_index!=0) { cout<<"Error: couldn't compress reccurences table."<<endl;
// payload_bytes.push_back(current); return -1;
// } }
// vector<unsigned char> payload_compressed; size_t rec_table_total_size;
// payload_compressed.resize(payload_bytes.size()+payload_bytes.size()/3+128); compressed_size=rec_table_compressed.size()-strm.avail_out;
// strm=LZMA_STREAM_INIT; rec_table_compressed.resize(compressed_size);
// if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) { original_size=rec_table.size();
// cout<<"Error: couldn't initialize LZMA compressor."<<endl; lzma_end(&strm);
// return -1; if (compressed_size>=original_size) {
// } flags&= ~(0b00000010);
// strm.next_in=payload_bytes.data(); rec_table_total_size=original_size;
// strm.avail_in=payload_bytes.size(); } else {
// strm.next_out=payload_compressed.data(); flags|=0b00000010;
// strm.avail_out=payload_compressed.size(); rec_table_total_size=compressed_size;
// ret=lzma_code(&strm,LZMA_FINISH); }
// if (ret!=LZMA_STREAM_END) { vector<unsigned char> files_table;
// cout<<"Error: couldn't compress reccurences table."<<endl; for (int i=0;i<files.size();i++) {
// return -1; for (auto c:files[i]) {
// } files_table.push_back(c);
// size_t compressed_size1=payload_compressed.size()-strm.avail_out; }
// payload_compressed.resize(compressed_size1); files_table.push_back('\0');
// size_t original_size1=payload_bytes.size(); auto file_start=payloads_start[i];
// lzma_end(&strm); for (int i=0;i<sizeof(size_t);++i) {
// out.push_back(compressed_size1>=original_size1?'C':'c'); files_table.push_back(((uint8_t*)&file_start)[i]);
// if (compressed_size>=original_size) { }
// CCC_ADD_COMPOMENT(out,rec_table); auto file_size=payloads_size[i];
// } else { for (int i=0;i<sizeof(size_t);++i) {
// CCC_ADD_COMPOMENT(out,rec_table_compressed); files_table.push_back(((uint8_t*)&file_size)[i]);
// } }
// if (compressed_size1>=original_size1) { }
// CCC_ADD_COMPOMENT(out,payload_bytes); vector<unsigned char> files_table_compressed;
// } else { files_table_compressed.resize(files_table.size()+files_table.size()/3+128);
// CCC_ADD_COMPOMENT(out,payload_compressed); strm=LZMA_STREAM_INIT;
// } if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
// ofstream fileout("test.ccc",ios::binary); cout<<"Error: couldn't initialize LZMA compressor for files table."<<endl;
// if (!fileout) { return -1;
// cout<<"Error: couldn't open output file."<<endl; }
// return -1; strm.next_in=files_table.data();
// } strm.avail_in=files_table.size();
// fileout.write(reinterpret_cast<const char*>(out.data()),out.size()); strm.next_out=files_table_compressed.data();
// fileout.close(); strm.avail_out=files_table_compressed.size();
// return 0; ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress files table."<<endl;
return -1;
}
size_t files_table_total_size;
compressed_size=files_table_compressed.size()-strm.avail_out;
files_table_compressed.resize(compressed_size);
original_size=files_table.size();
lzma_end(&strm);
if (compressed_size>=original_size) {
flags&= ~(0b00000100);
files_table_total_size=original_size;
} else {
flags|=0b00000100;
files_table_total_size=compressed_size;
}
header head;
head.sig[0]='C';
head.sig[1]='C';
head.sig[2]='C';
head.flags=flags;
head.size_payload=payload_total_size;
head.size_rec_table=rec_table_total_size;
head.entry_count=files.size();
vector<unsigned char> out;
for (int i=0;i<sizeof(header);i++) {
out.push_back(((uint8_t*)&head)[i]);
}
if (flags & 0b00000010) {
CCC_ADD_COMPOMENT(out,rec_table_compressed);
} else {
CCC_ADD_COMPOMENT(out,rec_table);
}
if (flags & 0b00000100) {
CCC_ADD_COMPOMENT(out,files_table_compressed);
} else {
CCC_ADD_COMPOMENT(out,files_table);
}
if (flags & 0b00000001) {
CCC_ADD_COMPOMENT(out,payload_compressed);
} else {
CCC_ADD_COMPOMENT(out,files_archive);
}
ofstream fileout("test.ccc",ios::binary);
if (!fileout) {
cout<<"Error: couldn't open output file."<<endl;
return -1;
}
fileout.write(reinterpret_cast<const char*>(out.data()),out.size());
fileout.close();
return 0;
} }

BIN
test.ccc Normal file

Binary file not shown.