From 75e84abf4cb7cdc6fa01e95ca0e0ff120a5aea82 Mon Sep 17 00:00:00 2001 From: lolo859 Date: Tue, 10 Feb 2026 23:22:08 +0100 Subject: [PATCH] final stage --- build.sh | 6 +- ccc.cpp | 724 ++++++++++++++++++++++++++++++++++++------------------- hello.c | 5 + test.py | 57 +++-- test.sh | 9 + 5 files changed, 539 insertions(+), 262 deletions(-) create mode 100644 hello.c create mode 100755 test.sh diff --git a/build.sh b/build.sh index 6d71709..84e3500 100755 --- a/build.sh +++ b/build.sh @@ -1 +1,5 @@ -g++ ccc.cpp -o ccc -ltree-sitter -ltree-sitter-c -llzma -Ofast -march=native +# git clone https://github.com/facebook/zstd.git +# cd zstd +# exit +# make -j$(nproc) CXXFLAGS="-DZSTD_MULTITHREAD_SUPPORT -DZSTD_MULTITHREAD" +g++ ccc.cpp -o ccc -ltree-sitter -ltree-sitter-c -lzstd -lxxhash -llzma -Ofast -march=native diff --git a/ccc.cpp b/ccc.cpp index ab8d528..e59d99d 100644 --- a/ccc.cpp +++ b/ccc.cpp @@ -1,49 +1,90 @@ #include +#include #include +#include #include #include #include +#include #include #include -#include +#include +#include #include +#include +#include #include #include +#include +#include +#include +#include #include #include #include #include -#include +#include using namespace std; namespace fs=filesystem; -const vector CCC_DELIMITER_0_HEAD={0}; -const vector CCC_DELIMITER_1_HEAD={1,0}; -const vector CCC_C_KEYWORD_HEAD={1,1,0,0}; -const vector CCC_MISCELANEOUS_HEAD={1,1,0,1}; -const vector CCC_STRING_INLINE_HEAD={1,1,1,0}; -const vector CCC_REC_TABLE_REF_HEAD={1,1,1,1}; -const vector CCC_STRING_INLINE_END={0,0,0,0,0,0,0,0}; +const uint64_t CCC_DELIMITER_0_HEAD=0b0; +const uint64_t CCC_DELIMITER_1_HEAD=0b10; +const uint64_t CCC_C_KEYWORD_HEAD=0b1100; +const uint64_t CCC_MISCELANEOUS_HEAD=0b1101; +const uint64_t CCC_STRING_INLINE_HEAD=0b1110; +const uint64_t CCC_REC_TABLE_REF_HEAD=0b1111; +const uint64_t CCC_STRING_INLINE_END=0b00000000; #define CCC_ADD_COMPONENT(vec,tail) \ do { \ auto tmp=tail; \ vec.insert(vec.end(),tmp.begin(),tmp.end()); \ } while (0) -#define CCC_ADD_COMPONENT_ALIGNED(vec,tail) \ - do { \ - static_assert(is_same_v>,"vec must be vector"); \ - static_assert(is_same_v>,"tail must be vector"); \ - vec.reserve(vec.size()+tail.size()+8); \ - for (auto b:tail) vec.push_back(b); \ - size_t rem=vec.size()%8; \ - if (rem!=0) { \ - vec.insert(vec.end(),8-rem,false); \ - } \ - } while (0) struct XXH3HasherString { size_t operator()(const std::string& s) const { return static_cast(XXH3_64bits(s.data(),s.size())); } }; +class bit_streamer { + private: + vector out; + uint8_t current_byte=0; + uint8_t bit_pos=0; + public: + size_t index; + bit_streamer(size_t index) { + out.reserve(1024*1024); + this->index=index; + } + size_t get_size() { + return out.size(); + } + void write_bits(uint64_t value,uint8_t count) { + for (int i=count-1;i>=0;--i) { + if ((value>>i) & 1) { + current_byte|=(1<<(7-bit_pos)); + } + bit_pos++; + if (bit_pos==8) { + out.push_back(current_byte); + current_byte=0; + bit_pos=0; + } + } + } + void align() { + if (bit_pos>0) { + out.push_back(current_byte); + current_byte=0; + bit_pos=0; + } + } + const vector& get_out() const { + return out; + } + vector extract_buffer() { + align(); + return std::move(out); + } +}; const vector delimiter0={ "{", "}", @@ -175,10 +216,6 @@ const vector c_keywords={ "__attribute__", "defined", }; -struct symbol { - string name; - int score; -}; #pragma pack(push,1) struct header { uint8_t sig[3]; @@ -193,115 +230,182 @@ struct node { uint32_t start; uint32_t end; }; -unordered_map optimized_type_u16_list; -unordered_map type_map; -vector> all_tokens; -map rec_map; +struct file_entry { + string name; + size_t size; + size_t index; +}; +struct thread_iterate_input_loop_call { + string &source_code; + vector &thread_local_node_list; + unordered_map& thread_local_type_map; + unordered_map& thread_local_type_u16_map; + uint16_t thread_local_next_type_id; + unordered_map thread_local_rec_map; +}; +struct thread_rec_map_result { + unordered_map thread_local_rec_map; +}; +struct thread_encoding_input_loop_call { + string &source_code; + vector &node_list; + unordered_map& thread_local_type_map; + bit_streamer& thread_local_bit_stream; +}; +enum iterating_mode { + REC_MAP, + PARSING +}; +queue rec_map_files_queue; +mutex rec_map_queue_mutex; +queue encoding_files_queue; +mutex encoding_queue_mutex; vector rec_list; -unordered_map rec_lookup; -unordered_map c_keyword_lookup; -unordered_map miscelaneous_lookup; -unordered_map delimiter0_lookup; -unordered_map delimiter1_lookup; -bool debug=false; +unordered_map> rec_lookup; +unordered_map> c_keyword_lookup; +unordered_map> miscelaneous_lookup; +unordered_map> delimiter0_lookup; +unordered_map> delimiter1_lookup; +bool show_warning=false; bool fail_on_warning=false; -static uint16_t next_type_id; -void get_all_nodes(TSNode node,const string &source_code,map &rec_map,size_t index) { - if (ts_node_child_count(node)==0) { - string text=source_code.substr(ts_node_start_byte(node),ts_node_end_byte(node)-ts_node_start_byte(node)); - string type=string(ts_node_type(node)); - if (optimized_type_u16_list.find(type)==optimized_type_u16_list.end()) { - optimized_type_u16_list[type]=next_type_id; - type_map[optimized_type_u16_list.at(type)]=type; - next_type_id++; - } - all_tokens[index].push_back({.type=optimized_type_u16_list[type],.start=ts_node_start_byte(node),.end=ts_node_end_byte(node)}); - if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="type_identifier" || type=="field_identifier" || type=="escape_sequence" || type=="statement_identifier") { - rec_map[text]++; - } - if (type=="primitive_type" && find(c_keywords.begin(),c_keywords.end(),text)==c_keywords.end()) { - rec_map[text]++; - } - if (type=="comment") { - rec_map[text]=2; +bool enable_malloc_trim=true; +void iterate_all_nodes_loop_call(thread_iterate_input_loop_call &settings,TSNode current_node,iterating_mode mode) { + if (ts_node_child_count(current_node)==0) { + uint32_t start=ts_node_start_byte(current_node); + uint32_t end=ts_node_end_byte(current_node); + string_view text{settings.source_code.data()+start,end-start}; + string type=string(ts_node_type(current_node)); + if (mode==iterating_mode::REC_MAP) { + if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="type_identifier" || type=="field_identifier" || type=="escape_sequence" || type=="statement_identifier") { + settings.thread_local_rec_map[string(text)]++; + } + if (type=="primitive_type" && find(c_keywords.begin(),c_keywords.end(),text)==c_keywords.end()) { + settings.thread_local_rec_map[string(text)]++; + } + if (type=="comment") { + settings.thread_local_rec_map[string(text)]=2; + } + } else if (mode==iterating_mode::PARSING) { + if (settings.thread_local_type_u16_map.find(type)==settings.thread_local_type_u16_map.end()) { + settings.thread_local_type_u16_map[type]=settings.thread_local_next_type_id; + settings.thread_local_type_map[settings.thread_local_type_u16_map.at(type)]=type; + settings.thread_local_next_type_id++; + } + settings.thread_local_node_list.push_back({.type=settings.thread_local_type_u16_map[type],.start=start,.end=end}); } } else { - uint32_t child_count=ts_node_child_count(node); + uint32_t child_count=ts_node_child_count(current_node); for (uint32_t i=0;i byte_to_bits(unsigned char c) { - vector out; - for (int i=7;i>=0;i--) { - bool enabled=(c>>i)&0x01; - out.push_back(enabled); +thread_rec_map_result run_thread_rec_map(size_t thread_num) { + auto start=chrono::high_resolution_clock::now(); + thread_rec_map_result res; + unordered_map useless_type_u16_map; + vector useless_node_vector; + unordered_map useless_type_map; + TSParser *parser=ts_parser_new(); + ts_parser_set_language(parser,tree_sitter_c()); + int counter=0; + while (true) { + file_entry f; + { + lock_guard lock(rec_map_queue_mutex); + if (rec_map_files_queue.empty()) break; + f=std::move(rec_map_files_queue.front()); + rec_map_files_queue.pop(); + } + ifstream file(f.name); + if (!file) { + cout<<"Error: couldn't open "<(file)),istreambuf_iterator()); + file.close(); + thread_iterate_input_loop_call loop_settings { + .source_code=code, + .thread_local_node_list=useless_node_vector, + .thread_local_type_map=useless_type_map, + .thread_local_type_u16_map=useless_type_u16_map, + .thread_local_next_type_id=0, + .thread_local_rec_map=res.thread_local_rec_map + }; + TSTree *tree=ts_parser_parse_string(parser,nullptr,code.c_str(),code.size()); + TSNode root=ts_tree_root_node(tree); + loop_settings.source_code=code; + iterate_all_nodes_loop_call(loop_settings,root,iterating_mode::REC_MAP); + string().swap(code); + ts_tree_delete(tree); + { + lock_guard lock(encoding_queue_mutex); + encoding_files_queue.push(std::move(f)); + } + if (++counter%20==0 && enable_malloc_trim) malloc_trim(0); } - return out; + ts_parser_delete(parser); + auto end=chrono::high_resolution_clock::now(); + auto ms=chrono::duration_cast(end-start).count(); + cout<<"Recccurences map thread number "< generate_c_keyword(size_t index) { - vector out; - CCC_ADD_COMPONENT(out,CCC_C_KEYWORD_HEAD); - for (int i=5;i>=0;i--) { - bool enabled=(index>>i)&0x01; - out.push_back(enabled); - } - return out; +void generate_c_keyword(bit_streamer& bitstream,size_t index) { + bitstream.align(); + bitstream.write_bits(CCC_C_KEYWORD_HEAD,4); + bitstream.write_bits(index,6); + bitstream.align(); + return; } -vector generate_rec(size_t index,size_t total_recs) { - vector out; +void generate_rec(bit_streamer& bitstream,size_t index,size_t total_recs) { size_t bits=0; while (total_recs) { total_recs>>=1; ++bits; } - CCC_ADD_COMPONENT(out,CCC_REC_TABLE_REF_HEAD); - for (int i=bits;i>=0;i--) { - bool enabled=(index>>i)&0x01; - out.push_back(enabled); - } - return out; + bitstream.align(); + bitstream.write_bits(CCC_REC_TABLE_REF_HEAD,4); + bitstream.write_bits(index,bits); + bitstream.align(); + return; } -vector generate_delimiter0(size_t index) { - vector out; - CCC_ADD_COMPONENT(out,CCC_DELIMITER_0_HEAD); - for (int i=2;i>=0;i--) { - bool enabled=(index>>i)&0x01; - out.push_back(enabled); - } - return out; +void generate_delimiter0(bit_streamer& bitstream,size_t index) { + bitstream.align(); + bitstream.write_bits(CCC_DELIMITER_0_HEAD,1); + bitstream.write_bits(index,3); + bitstream.align(); + return; } -vector generate_delimiter1(size_t index) { - vector out; - CCC_ADD_COMPONENT(out,CCC_DELIMITER_1_HEAD); - for (int i=1;i>=0;i--) { - bool enabled=(index>>i)&0x01; - out.push_back(enabled); - } - return out; +void generate_delimiter1(bit_streamer& bitstream,size_t index) { + bitstream.align(); + bitstream.write_bits(CCC_DELIMITER_1_HEAD,2); + bitstream.write_bits(index,2); + bitstream.align(); + return; } -vector generate_miscellaneous(size_t index) { - vector out; - CCC_ADD_COMPONENT(out,CCC_MISCELANEOUS_HEAD); - for (int i=5;i>=0;i--) { - bool enabled=(index>>i)&0x01; - out.push_back(enabled); - } - return out; +void generate_miscellaneous(bit_streamer& bitstream,size_t index) { + bitstream.align(); + bitstream.write_bits(CCC_MISCELANEOUS_HEAD,4); + bitstream.write_bits(index,6); + bitstream.align(); } -vector generate_string_content(string str) { - vector out; - CCC_ADD_COMPONENT(out,CCC_STRING_INLINE_HEAD); - for (auto c:str) { - CCC_ADD_COMPONENT(out,byte_to_bits(c)); +void generate_string_content(bit_streamer& bitstream,const char *text,size_t text_len) { + bitstream.align(); + bitstream.write_bits(CCC_STRING_INLINE_HEAD,4); + for (int i=0;i process_file_nodes(vector *nodes,string code) { - vector out; - for (int i=0;isize();i++) { - node n=nodes->at(i); - string type=type_map[n.type]; - string text=code.substr(n.start,n.end-n.start); +void process_file_nodes_loop_call(thread_encoding_input_loop_call& settings) { + bit_streamer& out=settings.thread_local_bit_stream; + for (int i=0;isecond; - CCC_ADD_COMPONENT_ALIGNED(out,generate_rec(index,rec_list.size())); - print_debug("rec_table for string ("+type+"): "+text); + generate_rec(out,index,rec_list.size()); } } else if (type=="primitive_type" || type=="type_identifier") { - auto it=c_keyword_lookup.find(text); + auto it=c_keyword_lookup.find(string(text)); if (it!=c_keyword_lookup.end()) { size_t index=it->second; - CCC_ADD_COMPONENT_ALIGNED(out,generate_c_keyword(index)); - print_debug("type found in c keyword: "+text); + generate_c_keyword(out,index); } else { - auto it=rec_lookup.find(text); + auto it=rec_lookup.find(string(text)); if (it==rec_lookup.end()) { if (!text.empty()) { - CCC_ADD_COMPONENT_ALIGNED(out,generate_string_content(text)); - print_debug("string for type ("+type+"): "+text); + generate_string_content(out,text.data(),text.size()); } else { - cout<<"Warning: type node is empty: "<second; - CCC_ADD_COMPONENT_ALIGNED(out,generate_rec(index,rec_list.size())); - print_debug("rec_table for string for type ("+type+"): "+text); + generate_rec(out,index,rec_list.size()); } } } else if (delimiter0_lookup.find(type)!=delimiter0_lookup.end() || delimiter1_lookup.find(type)!=delimiter1_lookup.end() || type=="\"") { string insert; - if (type=="(" && i+1size()) { - if (type_map[nodes->at(i+1).type]==")") { + if (type=="(" && i+1size()) { - if (type_map[nodes->at(i+1).type]=="]") { + } else if (type=="[" && i+1size()) { - if (type_map[nodes->at(i+1).type]=="}") { + } else if (type=="{" && i+1 process_file_nodes(vector *nodes,string code) { auto it=delimiter0_lookup.find(insert); if (it!=delimiter0_lookup.end()) { size_t index=it->second; - CCC_ADD_COMPONENT_ALIGNED(out,generate_delimiter0(index)); - print_debug("delimiter 0: "+insert); + generate_delimiter0(out,index); } else { if (insert!="{}" && insert!="\"") { auto it=delimiter1_lookup.find(insert); if (it!=delimiter1_lookup.end()) { size_t index=it->second; - CCC_ADD_COMPONENT_ALIGNED(out,generate_delimiter1(index)); - print_debug("delimiter 1: "+insert); + generate_delimiter1(out,index); } else { - cout<<"Warning: unknow delimiter, that shouldn't happen: "< process_file_nodes(vector *nodes,string code) { auto it=delimiter1_lookup.find("{}"); if (it!=delimiter1_lookup.end()) { size_t index=it->second; - CCC_ADD_COMPONENT(out,generate_delimiter1(index)); - vector temp={0}; - CCC_ADD_COMPONENT_ALIGNED(out,temp); - print_debug("delimiter 1: "+insert); + out.align(); + out.write_bits(CCC_DELIMITER_1_HEAD,2); + out.write_bits(index,2); + out.write_bits(0b0,1); + out.align(); } else { - cout<<"Warning: unknow delimiter, that shouldn't happen: "<second; - CCC_ADD_COMPONENT(out,generate_delimiter1(index)); - vector temp={1}; - CCC_ADD_COMPONENT_ALIGNED(out,temp); - print_debug("delimiter 1: "+insert); + out.align(); + out.write_bits(CCC_DELIMITER_1_HEAD,2); + out.write_bits(index,2); + out.write_bits(0b1,1); + out.align(); } else { - cout<<"Warning: unknow delimiter, that shouldn't happen: "< process_file_nodes(vector *nodes,string code) { auto it=c_keyword_lookup.find(type); if (it!=c_keyword_lookup.end()) { size_t index=it->second; - CCC_ADD_COMPONENT_ALIGNED(out,generate_c_keyword(index)); - print_debug("c keyword: "+type); + generate_c_keyword(out,index); } else { - cout<<"Warning: unknow C keyword, that shouldn't happen: "<second; - CCC_ADD_COMPONENT_ALIGNED(out,generate_c_keyword(index)); - print_debug("c keyword: "+type); + generate_c_keyword(out,index); } else { - auto it=rec_lookup.find(text); + auto it=rec_lookup.find(string(text)); if (it==rec_lookup.end()) { if (!text.empty()) { - CCC_ADD_COMPONENT_ALIGNED(out,generate_string_content(text)); - print_debug("string for c keyword ("+type+"): "+text); + generate_string_content(out,text.data(),text.size()); } else { - cout<<"Warning: C keyword is empty: "<second; - CCC_ADD_COMPONENT_ALIGNED(out,generate_rec(index,rec_list.size())); - print_debug("rec_table for string for c keyword ("+type+"): "+text); + generate_rec(out,index,rec_list.size()); } } } @@ -459,80 +555,114 @@ vector process_file_nodes(vector *nodes,string code) { auto it=miscelaneous_lookup.find(type); if (it!=miscelaneous_lookup.end()) { size_t index=it->second; - CCC_ADD_COMPONENT_ALIGNED(out,generate_miscellaneous(index)); - print_debug("miscellaneous: "+type); + generate_miscellaneous(out,index); } else { - cout<<"Warning: unknow miscellaneous, that shouldn't happen: "<second; - CCC_ADD_COMPONENT_ALIGNED(out,generate_rec(index,rec_list.size())); - print_debug("rec_table for comment"); + generate_rec(out,index,rec_list.size()); } } else { auto it=rec_lookup.find(type); if (it==rec_lookup.end()) { if (!text.empty()) { - CCC_ADD_COMPONENT_ALIGNED(out,generate_string_content(text)); - print_debug("string for unknow node ("+type+"): "+text); + generate_string_content(out,text.data(),text.size()); } else { - cout<<"Warning: unknow node is empty: "<second; - CCC_ADD_COMPONENT_ALIGNED(out,generate_rec(index,rec_list.size())); - print_debug("rec_table for string for unknow node ("+type+"): "+text); + generate_rec(out,index,rec_list.size()); } } } - vector payload_bytes; - unsigned char current=0; - size_t bit_index=0; - for (bool b:out) { - current|=(b<<(7-bit_index)); - bit_index++; - if (bit_index==8) { - payload_bytes.push_back(current); - current=0; - bit_index=0; - } - } - if (bit_index!=0) { - payload_bytes.push_back(current); - } - return payload_bytes; + out.align(); + return; } -void construct_rec_table(vector &files_content,vector files_names) { - for (int i=0;i=2 and s.first.size()>=3) { - rec_list.push_back(s.first); +void run_thread_encoding(size_t thread_num) { + auto start=chrono::high_resolution_clock::now(); + unordered_map useless_rec_map; + unordered_map thread_local_type_map; + unordered_map thread_local_type_u16_map; + vector thread_local_node_list; + TSParser *parser=ts_parser_new(); + ts_parser_set_language(parser,tree_sitter_c()); + int counter=0; + while (true) { + bit_streamer bitstream(0); + file_entry f; + { + lock_guard lock(encoding_queue_mutex); + if (encoding_files_queue.empty()) break; + f=std::move(encoding_files_queue.front()); + encoding_files_queue.pop(); } + bitstream.index=f.index; + ofstream temp_thread_out(".temp_ccc/temp_ccc_"+to_string(f.index)+".bin",ios::binary); + if (!temp_thread_out) { + cout<<"Error: couldn't open .temp_ccc/temp_ccc_"<(file)), istreambuf_iterator()); + file.close(); + thread_iterate_input_loop_call iterate_loop_settings { + .source_code=code, + .thread_local_node_list=thread_local_node_list, + .thread_local_type_map=thread_local_type_map, + .thread_local_type_u16_map=thread_local_type_u16_map, + .thread_local_next_type_id=0, + .thread_local_rec_map=useless_rec_map + }; + thread_encoding_input_loop_call encoding_loop_settings { + .source_code=code, + .node_list=thread_local_node_list, + .thread_local_type_map=thread_local_type_map, + .thread_local_bit_stream=bitstream + }; + TSTree *tree=ts_parser_parse_string(parser,nullptr,code.c_str(),code.size()); + TSNode root=ts_tree_root_node(tree); + iterate_loop_settings.source_code=code; + iterate_all_nodes_loop_call(iterate_loop_settings,root,iterating_mode::PARSING); + ts_tree_delete(tree); + encoding_loop_settings.source_code=code; + process_file_nodes_loop_call(encoding_loop_settings); + auto payload=std::move(bitstream.extract_buffer()); + temp_thread_out.write(reinterpret_cast(payload.data()),payload.size()); + temp_thread_out.close(); + payload.clear(); + payload.shrink_to_fit(); + vector().swap(thread_local_node_list); + thread_local_type_map.clear(); + thread_local_type_u16_map.clear(); + string().swap(code); + if (++counter%10==0 && enable_malloc_trim) malloc_trim(0); } - for (int i=0;i(end-start).count(); + cout<<"Parsing/encoding thread number "< files; for (int i=1;i files_content; for (int i=0;i> rec_map_futures; + for (size_t i=0;i all_rec_map_results; + map global_rec_map; + for (auto& fut:rec_map_futures) { + all_rec_map_results.push_back(fut.get()); + for (auto const& [str,count]:all_rec_map_results.back().thread_local_rec_map) { + global_rec_map[str]+=count; } - string code((istreambuf_iterator(file)),istreambuf_iterator()); - files_content.push_back(code); - cout< files_archive; - vector payloads_size; - vector payloads_start; - for (int i=0;i=2 && str.size()>=3) { + rec_list.push_back(str); + rec_lookup[str]=rec_list.size()-1; + } } + global_rec_map.clear(); + vector encoding_files_vec; + while (!encoding_files_queue.empty()) { + encoding_files_vec.push_back(std::move(encoding_files_queue.front())); + encoding_files_queue.pop(); + } + sort(encoding_files_vec.begin(),encoding_files_vec.end(),[](const file_entry& a,const file_entry& b) { + return a.size>b.size; + }); + for (auto& f:encoding_files_vec) { + encoding_files_queue.push(std::move(f)); + } + vector> encoding_futures; + for (size_t i=0;i global_payloads_start; + size_t current_offset=0; + for (int i=0;i final_payloads; + final_payloads.resize(current_offset); + size_t offset=0; + for (int i=0;i(final_payloads.data()+offset),current_file_size); + offset+=current_file_size; + ccc_file.close(); + } + fs::remove_all(".temp_ccc"); vector payload_compressed; - payload_compressed.resize(files_archive.size()+files_archive.size()/3+128); - lzma_stream strm=LZMA_STREAM_INIT; - if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) { - cout<<"Error: couldn't initialize LZMA compressor for file archive."<(end-start).count(); if (ret!=LZMA_STREAM_END) { - cout<<"Error: couldn't compress file archive."<=original_size) { flags&= ~(0b00000001); @@ -663,11 +888,16 @@ int main(int argc,char **argv) { files_table.push_back(c); } files_table.push_back('\0'); - auto file_start=payloads_start[i]; + auto file_start=global_payloads_start[i]; for (int i=0;i +int main() { + printf("hello"); + return 0; +} diff --git a/test.py b/test.py index 2871877..b921dde 100644 --- a/test.py +++ b/test.py @@ -3,17 +3,37 @@ import subprocess import time def get_source_files(root_dir): - """Récupère les fichiers et calcule la taille totale.""" source_files = [] total_size = 0 for root, _, files in os.walk(root_dir): for file in files: - if file.endswith(('.c', '.h')) and len(source_files)<20000: + if file.endswith(('.c', '.h')) and len(source_files) < 30000: path = os.path.join(root, file) source_files.append(path) total_size += os.path.getsize(path) return source_files, total_size + +def monitor_process(proc): + max_rss = 0 + + while proc.poll() is None: + try: + with open(f"/proc/{proc.pid}/status") as f: + for line in f: + if line.startswith("VmHWM:"): + rss = int(line.split()[1]) # kB + if rss > max_rss: + max_rss = rss + break + except FileNotFoundError: + break + + time.sleep(0.05) + + return max_rss + + def main(): target_dir = "linux" if not os.path.exists(target_dir): @@ -26,44 +46,53 @@ def main(): print(f"Fichiers trouvés : {len(files)}") print(f"Taille totale brute : {raw_mo:.2f} Mo") - # 1. Compression avec TAR + # 1. TAR print("\n--- Lancement de TAR -cJf (XZ) ---") start_tar = time.time() tar_cmd = ["tar", "-cJf", "linux_sources.tar.xz", "--files-from=-"] process_tar = subprocess.Popen(tar_cmd, stdin=subprocess.PIPE) - process_tar.communicate(input="\n".join(files).encode()) + + process_tar.stdin.write("\n".join(files).encode()) + process_tar.stdin.close() + + peak_tar = monitor_process(process_tar) + process_tar.wait() end_tar = time.time() - # 2. Compression avec CCC + # 2. CCC print("\n--- Lancement de CCC (Output temps réel) ---") print("-" * 40) start_ccc = time.time() try: - # On laisse stdout et stderr par défaut pour voir l'output de CCC - subprocess.run(["./ccc"] + files, check=True) - except subprocess.CalledProcessError as e: - print(f"\nErreur fatale CCC : {e}") + process_ccc = subprocess.Popen(["./ccc"] + files) + peak_ccc = monitor_process(process_ccc) + process_ccc.wait() except OSError as e: - print(f"\nErreur système (trop de fichiers ?) : {e}") + print(f"\nErreur système : {e}") return + end_ccc = time.time() print("-" * 40) - # 3. Calculs finaux - print("\n" + "="*40) + # Résultats + print("\n" + "=" * 40) print(f" RÉSULTATS (Source: {raw_mo:.2f} Mo)") - print("="*40) + print("=" * 40) for name, filename in [("TAR.XZ", "linux_sources.tar.xz"), ("CCC", "test.ccc")]: if os.path.exists(filename): size_mo = os.path.getsize(filename) / (1024 * 1024) ratio = (size_mo / raw_mo) * 100 - print(f"{name:10} : {size_mo:8.2f} Mo ({ratio:5.2f}% du total)") + print(f"{name:10} : {size_mo:8.2f} Mo ({ratio:5.2f}%)") else: print(f"{name:10} : Non généré") print(f"\nTemps TAR : {end_tar - start_tar:.2f}s") print(f"Temps CCC : {end_ccc - start_ccc:.2f}s") + print(f"\nPic RAM TAR : {peak_tar / 1024:.2f} Mo") + print(f"Pic RAM CCC : {peak_ccc / 1024:.2f} Mo") + + if __name__ == "__main__": main() diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..269c823 --- /dev/null +++ b/test.sh @@ -0,0 +1,9 @@ +python test.py & +pid=$! +max=0 +while kill -0 $pid 2>/dev/null; do + rss=$(awk '/VmRSS/ {print $2}' /proc/$pid/status) + (( rss > max )) && max=$rss + sleep 0.1 +done +echo "Max RSS = $max kB"