new optis

This commit is contained in:
2026-04-28 17:35:37 +02:00
commit fd8bb0ed19
15 changed files with 6514 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
ccc
linux/

BIN
all.tar.gz Normal file

Binary file not shown.

1
build.sh Executable file
View File

@@ -0,0 +1 @@
g++ ccc.cpp -o ccc -ltree-sitter -ltree-sitter-c -llzma -lxxhash -Ofast -march=native

992
ccc.cpp Normal file
View File

@@ -0,0 +1,992 @@
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <exception>
#include <iostream>
#include <filesystem>
#include <fstream>
#include <mutex>
#include <stdint.h>
#include <string>
#include <string_view>
#include <threads.h>
#include <vector>
#include <map>
#include <unordered_map>
#include <iterator>
#include <algorithm>
#include <thread>
#include <future>
#include <queue>
#include <chrono>
#include <tree_sitter/api.h>
#include <tree_sitter/tree-sitter-c.h>
#include <lzma.h>
#include <xxh3.h>
#include <malloc.h>
using namespace std;
namespace fs=filesystem;
const uint64_t CCC_DELIMITER_0_HEAD=0b0;
const uint64_t CCC_DELIMITER_1_HEAD=0b10;
const uint64_t CCC_C_KEYWORD_HEAD=0b1100;
const uint64_t CCC_MISCELANEOUS_HEAD=0b1101;
const uint64_t CCC_STRING_INLINE_HEAD=0b1110;
const uint64_t CCC_REC_TABLE_REF_HEAD=0b1111;
const uint64_t CCC_STRING_INLINE_END=0b00000000;
#define CCC_ADD_COMPONENT(vec,tail) \
do { \
auto tmp=tail; \
vec.insert(vec.end(),tmp.begin(),tmp.end()); \
} while (0)
struct XXH3HasherString {
size_t operator()(const std::string& s) const {
return static_cast<size_t>(XXH3_64bits(s.data(),s.size()));
}
};
class bit_streamer {
private:
vector<uint8_t> out;
uint8_t current_byte=0;
uint8_t bit_pos=0;
public:
size_t index;
bit_streamer(size_t index) {
out.reserve(1024*1024);
this->index=index;
}
size_t get_size() {
return out.size();
}
void write_bits(uint64_t value,uint8_t count) {
for (int i=count-1;i>=0;--i) {
if ((value>>i) & 1) {
current_byte|=(1<<(7-bit_pos));
}
bit_pos++;
if (bit_pos==8) {
out.push_back(current_byte);
current_byte=0;
bit_pos=0;
}
}
}
void align() {
if (bit_pos>0) {
out.push_back(current_byte);
current_byte=0;
bit_pos=0;
}
}
const vector<uint8_t>& get_out() const {
return out;
}
vector<uint8_t> extract_buffer() {
align();
return std::move(out);
}
};
const vector<string> delimiter0={
"{",
"}",
"(",
")",
"[",
"]",
",",
"."
};
const vector<string> delimiter1={
"{}",
"()",
"[]",
";"
};
const vector<string> miscellaneous={
"!",
"%",
"'",
"*",
"+",
"-",
"/",
":",
"<",
">",
"=",
"?",
"^",
"|",
"&",
"~",
"+=",
"-=",
"*=",
"/=",
"%=",
"&=",
"|=",
"^=",
"<<=",
">>=",
"++",
"--",
"<<",
">>",
"==",
"!=",
"<=",
">=",
"->",
"...",
"||",
"&&",
"NULL",
"size_t",
"uint8_t",
"uint16_t",
"uint32_t",
"uint64_t",
"int8_t",
"int16_t",
"int32_t",
"int64_t"
};
const vector<string> c_keywords={
"#if",
"#ifdef",
"#ifndef",
"#else",
"#elif",
"#elifdef",
"#elifndef",
"#endif",
"#define",
"#undef",
"#include",
"#error",
"#warning",
"#pragma",
"#line",
"alignas",
"alignof",
"auto",
"bool",
"break",
"case",
"char",
"const",
"constexpr",
"continue",
"default",
"do",
"double",
"else",
"enum",
"extern",
"false",
"float",
"for",
"goto",
"if",
"inline",
"int",
"long",
"nullptr",
"register",
"restrict",
"return",
"short",
"signed",
"sizeof",
"static",
"static_assert",
"struct",
"switch",
"thread_local",
"true",
"typedef",
"typeof",
"typeof_unequal",
"union",
"unsigned",
"void",
"volatile",
"while",
"__asm__",
"__attribute__",
"defined",
};
#pragma pack(push,1)
struct header {
uint8_t sig[3];
uint8_t flags;
size_t size_rec_table;
size_t entry_count;
size_t size_payload;
};
#pragma pack(pop)
struct node {
uint32_t type;
uint32_t start;
uint32_t end;
};
struct file_entry {
string name;
string content;
size_t size;
size_t index;
};
struct thread_iterate_input_loop_call {
string &source_code;
vector<node> &thread_local_node_list;
map<string,int>& thread_local_rec_map;
};
struct thread_rec_map_result {
map<string,int> thread_local_rec_map;
};
struct thread_encoding_input_loop_call {
string &source_code;
vector<node> node_list;
bit_streamer& thread_local_bit_stream;
};
struct thread_encoding_result {
vector<bit_streamer> encoded_files;
};
queue<file_entry> rec_map_files_queue;
mutex rec_map_queue_mutex;
queue<file_entry> encoding_files_queue;
mutex encoding_queue_mutex;
mutex filename_nodes_mutex;
vector<string> rec_list;
unordered_map<string,size_t> rec_lookup;
unordered_map<string,size_t,XXH3HasherString,std::equal_to<>> c_keyword_lookup;
unordered_map<string,size_t,XXH3HasherString,std::equal_to<>> miscelaneous_lookup;
unordered_map<string,size_t,XXH3HasherString,std::equal_to<>> delimiter0_lookup;
unordered_map<string,size_t,XXH3HasherString,std::equal_to<>> delimiter1_lookup;
unordered_map<string,uint32_t,XXH3HasherString> type_to_id;
vector<string> id_to_type;
unordered_map<string,vector<node>,XXH3HasherString> filename_to_node_list;
bool show_warning=false;
bool fail_on_warning=false;
bool enable_malloc_trim=true;
mutex type_alloc;
uint32_t get_id(const string& type) {
{
auto it=type_to_id.find(type);
if (it!=type_to_id.end()) return it->second;
}
lock_guard<mutex> lock(type_alloc);
auto it=type_to_id.find(type);
if (it!=type_to_id.end()) return it->second;
uint32_t id=id_to_type.size();
type_to_id[type]=id;
id_to_type.push_back(type);
return id;
}
uint32_t ID_STRING_CONTENT,ID_SYSTEM_LIB_STRING,ID_IDENTIFIER,ID_NUMBER_LITERAL,ID_TYPE_IDENTIFIER,ID_FIELD_IDENTIFIER,ID_ESCAPE_SEQUENCE,ID_STATEMENT_IDENTIFIER,ID_PRIMITIVE_TYPE,ID_COMMENT,ID_PREPROC_ARG,ID_CHARACTER,ID_PREPROC_DIRECTIVE;
uint32_t ID_LEFT_PAR,ID_RIGHT_PAR,ID_LEFT_CROCHET,ID_RIGHT_CROCHET,ID_LEFT_ACC,ID_RIGHT_ACC,ID_QUOTE;
void iterate_all_nodes_loop_call(thread_iterate_input_loop_call &settings,TSNode current_node) {
if (ts_node_child_count(current_node)==0) {
uint32_t start=ts_node_start_byte(current_node);
uint32_t end=ts_node_end_byte(current_node);
string_view text{settings.source_code.data()+start,end-start};
string type=string(ts_node_type(current_node));
if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="type_identifier" || type=="field_identifier" || type=="escape_sequence" || type=="statement_identifier") {
settings.thread_local_rec_map[string(text)]++;
}
if (type=="primitive_type" && find(c_keywords.begin(),c_keywords.end(),text)==c_keywords.end()) {
settings.thread_local_rec_map[string(text)]++;
}
if (type=="comment") {
settings.thread_local_rec_map[string(text)]=2;
}
settings.thread_local_node_list.push_back({.type=get_id(type),.start=start,.end=end});
} else {
uint32_t child_count=ts_node_child_count(current_node);
for (uint32_t i=0;i<child_count;++i) {
TSNode child=ts_node_child(current_node,i);
iterate_all_nodes_loop_call(settings,child);
}
}
}
thread_rec_map_result run_thread_rec_map(size_t thread_num) {
auto start=chrono::high_resolution_clock::now();
thread_rec_map_result res;
TSParser *parser=ts_parser_new();
ts_parser_set_language(parser,tree_sitter_c());
int counter=0;
while (true) {
vector<node> node_vector={};
file_entry f;
{
lock_guard<mutex> lock(rec_map_queue_mutex);
if (rec_map_files_queue.empty()) break;
f=std::move(rec_map_files_queue.front());
rec_map_files_queue.pop();
}
thread_iterate_input_loop_call loop_settings {
.source_code=f.content,
.thread_local_node_list=node_vector,
.thread_local_rec_map=res.thread_local_rec_map
};
TSTree *tree=ts_parser_parse_string(parser,nullptr,f.content.c_str(),f.content.size());
TSNode root=ts_tree_root_node(tree);
loop_settings.source_code=f.content;
iterate_all_nodes_loop_call(loop_settings,root);
ts_tree_delete(tree);
{
lock_guard<mutex> lock(filename_nodes_mutex);
filename_to_node_list[f.name]=std::move(node_vector);
}
{
lock_guard<mutex> lock(encoding_queue_mutex);
encoding_files_queue.push(std::move(f));
}
if (++counter%20==0 && enable_malloc_trim) malloc_trim(0);
}
ts_parser_delete(parser);
auto end=chrono::high_resolution_clock::now();
auto ms=chrono::duration_cast<chrono::milliseconds>(end-start).count();
cout<<"Recccurences map thread number "<<thread_num<<" finished succesfully on "<<ms<<" milliseconds."<<endl;
return res;
}
void generate_c_keyword(bit_streamer& bitstream,size_t index) {
bitstream.align();
bitstream.write_bits(CCC_C_KEYWORD_HEAD,4);
bitstream.write_bits(index,6);
bitstream.align();
return;
}
void generate_rec(bit_streamer& bitstream,size_t index,size_t total_recs) {
size_t bits=0;
while (total_recs) {
total_recs>>=1;
++bits;
}
bitstream.align();
bitstream.write_bits(CCC_REC_TABLE_REF_HEAD,4);
bitstream.write_bits(index,bits);
bitstream.align();
return;
}
void generate_delimiter0(bit_streamer& bitstream,size_t index) {
bitstream.align();
bitstream.write_bits(CCC_DELIMITER_0_HEAD,1);
bitstream.write_bits(index,3);
bitstream.align();
return;
}
void generate_delimiter1(bit_streamer& bitstream,size_t index) {
bitstream.align();
bitstream.write_bits(CCC_DELIMITER_1_HEAD,2);
bitstream.write_bits(index,2);
bitstream.align();
return;
}
void generate_miscellaneous(bit_streamer& bitstream,size_t index) {
bitstream.align();
bitstream.write_bits(CCC_MISCELANEOUS_HEAD,4);
bitstream.write_bits(index,6);
bitstream.align();
}
void generate_string_content(bit_streamer& bitstream,const char *text,size_t text_len) {
bitstream.align();
bitstream.write_bits(CCC_STRING_INLINE_HEAD,4);
for (int i=0;i<text_len;i++) {
bitstream.write_bits(text[i],8);
}
bitstream.write_bits(CCC_STRING_INLINE_END,8);
bitstream.align();
return;
}
void print_warning(string text) {
if (show_warning==true) {
cout<<text<<endl;
}
}
void fail_if_warning() {
if (fail_on_warning) {
exit(-1);
}
}
void process_file_nodes_loop_call(thread_encoding_input_loop_call& settings) {
bit_streamer& out=settings.thread_local_bit_stream;
for (int i=0;i<settings.node_list.size();i++) {
node n=settings.node_list.at(i);
uint32_t &type=n.type;
string_view text{settings.source_code.data()+n.start,n.end-n.start};
if (type==ID_STRING_CONTENT || type==ID_SYSTEM_LIB_STRING || type==ID_IDENTIFIER || type==ID_NUMBER_LITERAL || type==ID_FIELD_IDENTIFIER || type==ID_PREPROC_ARG || type==ID_ESCAPE_SEQUENCE || type==ID_CHARACTER || type==ID_STATEMENT_IDENTIFIER) {
auto it=rec_lookup.find(string(text));
if (it==rec_lookup.end()) {
generate_string_content(out,text.data(),text.size());
} else {
size_t index=it->second;
generate_rec(out,index,rec_list.size());
}
} else if (type==ID_PRIMITIVE_TYPE || type==ID_TYPE_IDENTIFIER) {
auto it=c_keyword_lookup.find(string(text));
if (it!=c_keyword_lookup.end()) {
size_t index=it->second;
generate_c_keyword(out,index);
} else {
auto it=rec_lookup.find(string(text));
if (it==rec_lookup.end()) {
if (!text.empty()) {
generate_string_content(out,text.data(),text.size());
} else {
print_warning("Warning: type node is empty: "+string(text));
fail_if_warning();
}
} else {
size_t index=it->second;
generate_rec(out,index,rec_list.size());
}
}
} else if (delimiter0_lookup.find(id_to_type[type])!=delimiter0_lookup.end() || delimiter1_lookup.find(id_to_type[type])!=delimiter1_lookup.end() || type==ID_QUOTE) {
string insert;
if (type==ID_LEFT_PAR && i+1<settings.node_list.size()) {
if (settings.node_list[i+1].type==ID_RIGHT_PAR) {
insert="()";
i++;
} else {
insert="(";
}
} else if (type==ID_LEFT_CROCHET && i+1<settings.node_list.size()) {
if (settings.node_list[i+1].type==ID_RIGHT_CROCHET) {
insert="[]";
i++;
} else {
insert="[";
}
} else if (type==ID_LEFT_ACC && i+1<settings.node_list.size()) {
if (settings.node_list[i+1].type==ID_RIGHT_ACC) {
insert="{}";
i++;
} else {
insert="{";
}
} else {
insert=id_to_type[type];
}
auto it=delimiter0_lookup.find(insert);
if (it!=delimiter0_lookup.end()) {
size_t index=it->second;
generate_delimiter0(out,index);
} else {
if (insert!="{}" && insert!="\"") {
auto it=delimiter1_lookup.find(insert);
if (it!=delimiter1_lookup.end()) {
size_t index=it->second;
generate_delimiter1(out,index);
} else {
print_warning("Warning: unknow delimiter, that shouldn't happen: "+insert);
fail_if_warning();
}
} else {
if (insert=="{}") {
auto it=delimiter1_lookup.find("{}");
if (it!=delimiter1_lookup.end()) {
size_t index=it->second;
out.align();
out.write_bits(CCC_DELIMITER_1_HEAD,2);
out.write_bits(index,2);
out.write_bits(0b0,1);
out.align();
} else {
print_warning("Warning: unknow delimiter, that shouldn't happen: "+insert);
fail_if_warning();
}
} else if (insert=="\"") {
auto it=delimiter1_lookup.find("{}");
if (it!=delimiter1_lookup.end()) {
size_t index=it->second;
out.align();
out.write_bits(CCC_DELIMITER_1_HEAD,2);
out.write_bits(index,2);
out.write_bits(0b1,1);
out.align();
} else {
print_warning("Warning: unknow delimiter, that shouldn't happen: "+insert);
fail_if_warning();
}
} else {
print_warning("Warning: unknow delimiter, that shouldn't happen: "+insert);
fail_if_warning();
}
}
}
} else if (c_keyword_lookup.find(id_to_type[type])!=c_keyword_lookup.end() || type==ID_PREPROC_DIRECTIVE) {
if (type!=ID_PREPROC_DIRECTIVE) {
auto it=c_keyword_lookup.find(id_to_type[type]);
if (it!=c_keyword_lookup.end()) {
size_t index=it->second;
generate_c_keyword(out,index);
} else {
print_warning("Warning: unknow C keyword, that shouldn't happen: "+id_to_type[type]+" "+string(text));
fail_if_warning();
}
} else {
auto it=c_keyword_lookup.find(string(text));
if (it!=c_keyword_lookup.end()) {
size_t index=it->second;
generate_c_keyword(out,index);
} else {
auto it=rec_lookup.find(string(text));
if (it==rec_lookup.end()) {
if (!text.empty()) {
generate_string_content(out,text.data(),text.size());
} else {
print_warning("Warning: C keyword is empty: "+string(text));
fail_if_warning();
}
} else {
size_t index=it->second;
generate_rec(out,index,rec_list.size());
}
}
}
} else if (miscelaneous_lookup.find(id_to_type[type])!=miscelaneous_lookup.end()) {
auto it=miscelaneous_lookup.find(id_to_type[type]);
if (it!=miscelaneous_lookup.end()) {
size_t index=it->second;
generate_miscellaneous(out,index);
} else {
print_warning("Warning: unknow miscellaneous, that shouldn't happen: "+id_to_type[type]);
fail_if_warning();
}
} else if (type==ID_COMMENT) {
auto it=rec_lookup.find(string(text));
if (it==rec_lookup.end()) {
if (!text.empty()) {
generate_string_content(out,text.data(),text.size());
} else {
print_warning("Warning: comment is empty: "+string(text));
fail_if_warning();
}
} else {
size_t index=it->second;
generate_rec(out,index,rec_list.size());
}
} else {
auto it=rec_lookup.find(id_to_type[type]);
if (it==rec_lookup.end()) {
if (!text.empty()) {
generate_string_content(out,text.data(),text.size());
} else {
print_warning("Warning: unknow node is empty: "+string(text));
fail_if_warning();
}
} else {
size_t index=it->second;
generate_rec(out,index,rec_list.size());
}
}
}
out.align();
return;
}
thread_encoding_result run_thread_encoding(size_t thread_num) {
auto start=chrono::high_resolution_clock::now();
thread_encoding_result res;
vector<bit_streamer> thread_local_encoded_files;
int counter=0;
int max=0;
while (true) {
file_entry f;
{
lock_guard<mutex> lock(encoding_queue_mutex);
if (encoding_files_queue.empty()) break;
f=std::move(encoding_files_queue.front());
encoding_files_queue.pop();
}
thread_local_encoded_files.emplace_back(f.index);
thread_encoding_input_loop_call encoding_loop_settings {
.source_code=f.content,
.node_list=std::move(filename_to_node_list[f.name]),
.thread_local_bit_stream=thread_local_encoded_files[counter]
};
encoding_loop_settings.source_code=f.content;
process_file_nodes_loop_call(encoding_loop_settings);
vector<node>().swap(encoding_loop_settings.node_list);
string().swap(f.content);
if (++counter%20==0 && enable_malloc_trim) malloc_trim(0);
}
res.encoded_files=std::move(thread_local_encoded_files);
auto end=chrono::high_resolution_clock::now();
auto ms=chrono::duration_cast<chrono::milliseconds>(end-start).count();
cout<<"Parsing/encoding thread number "<<thread_num<<" finished succesfully on "<<ms<<" milliseconds."<<endl;
return res;
}
int main(int argc,char **argv) {
ID_STRING_CONTENT=get_id("string_content");
ID_SYSTEM_LIB_STRING=get_id("system_lib_string");
ID_IDENTIFIER=get_id("identifier");
ID_NUMBER_LITERAL=get_id("number_literal");
ID_TYPE_IDENTIFIER=get_id("type_identifier");
ID_FIELD_IDENTIFIER=get_id("field_identifier");
ID_ESCAPE_SEQUENCE=get_id("escape_sequence");
ID_STATEMENT_IDENTIFIER=get_id("statement_identifier"),
ID_PRIMITIVE_TYPE=get_id("primitive_type");
ID_COMMENT=get_id("comment");
ID_PREPROC_ARG=get_id("preproc_arg");
ID_CHARACTER=get_id("character");
ID_LEFT_PAR=get_id("(");
ID_RIGHT_PAR=get_id(")");
ID_LEFT_CROCHET=get_id("[");
ID_RIGHT_CROCHET=get_id("]");
ID_LEFT_ACC=get_id("{");
ID_RIGHT_ACC=get_id("}");
ID_PREPROC_DIRECTIVE=get_id("preproc_directive");
ID_QUOTE=get_id("\"");
for (int i=0;i<c_keywords.size();i++) {
c_keyword_lookup[c_keywords[i]]=i;
}
for (int i=0;i<miscellaneous.size();i++) {
miscelaneous_lookup[miscellaneous[i]]=i;
}
for (int i=0;i<delimiter0.size();i++) {
delimiter0_lookup[delimiter0[i]]=i;
}
for (int i=0;i<delimiter1.size();i++) {
delimiter1_lookup[delimiter1[i]]=i;
}
if (argc<2) {
cout<<"Usage: ccc [FILES]"<<endl;
return -1;
}
size_t compression_ratio=6;
vector<string> files;
for (int i=1;i<argc;i++) {
string file=string(argv[i]);
if (file=="-W") {
fail_on_warning=true;
continue;
}
if (file=="-w") {
show_warning=true;
continue;
}
if (file.substr(0,2)=="-c" && file.size()==3) {
try {
compression_ratio=stoi(file.substr(2,1));
continue;
} catch (const exception& e) {
cout<<"Error: invalid argument: "<<file<<endl;
return -1;
}
}
if (file=="-f") {
enable_malloc_trim=false;
continue;
}
if (file=="-h" || file=="--help") {
cout<<"C Code Compressor v0.1"<<endl;
cout<<"Usage: ccc [-hfwW] [FILES]"<<endl;
cout<<"Options:"<<endl;
cout<<" -h : show this help message"<<endl;
cout<<" -f : enable fast mode, reduce the total compression time but does not release unused"<<endl;
cout<<" unused heap memory back to the OS. Usage of this option can raise memory usage."<<endl;
cout<<" -w : show warning messages. For example, when a unknown or empty node is detected."<<endl;
cout<<" -W : crash on warning"<<endl;
cout<<" -c0..9: set the compression ratio for LZMA multithreaded compression phase. Default is 6."<<endl;
cout<<" Below level 6, CCC may not compress better than tar.xz."<<endl;
cout<<" Warning: setting this higher than -c6 will seriously raise memory usage."<<endl;
cout<<" For exemple, using -c9 more than double memory usage in comparison with"<<endl;
cout<<" -c6 (which is the default). "<<endl;
cout<<" Warning: usage of higher options than -c6 combined with -f is heavily not"<<endl;
cout<<" recommended."<<endl;
return 0;
}
if (!fs::exists(file)) {
cout<<"Error: file doesn't exist: "<<file<<endl;
return -1;
}
files.push_back(file);
}
for (int i=0;i<files.size();i++) {
ifstream file(files[i],ios::binary);
if (!file) {
cout<<"Error: couldn't open provided file."<<endl;
return -1;
}
string code((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
file_entry f{files[i],std::move(code),code.size()};
f.index=i;
rec_map_files_queue.push(std::move(f));
}
size_t nb_threads=thread::hardware_concurrency();
size_t total_files=files.size();
vector<future<thread_rec_map_result>> rec_map_futures;
for (size_t i=0;i<nb_threads;++i) {
rec_map_futures.push_back(async(launch::async,run_thread_rec_map,i+1));
}
vector<thread_rec_map_result> all_rec_map_results;
map<string,int> global_rec_map;
for (auto& fut:rec_map_futures) {
all_rec_map_results.push_back(fut.get());
for (auto const& [str,count]:all_rec_map_results.back().thread_local_rec_map) {
global_rec_map[str]+=count;
}
}
for (auto const& [str,count]:global_rec_map) {
if (count>=2 && str.size()>=3) {
rec_list.push_back(str);
rec_lookup[str]=rec_list.size()-1;
}
}
global_rec_map.clear();
vector<file_entry> encoding_files_vec;
while (!encoding_files_queue.empty()) {
encoding_files_vec.push_back(std::move(encoding_files_queue.front()));
encoding_files_queue.pop();
}
sort(encoding_files_vec.begin(),encoding_files_vec.end(),[](const file_entry& a,const file_entry& b) {
return a.size>b.size;
});
for (auto& f:encoding_files_vec) {
encoding_files_queue.push(std::move(f));
}
vector<future<thread_encoding_result>> encoding_futures;
for (size_t i=0;i<all_rec_map_results.size();++i) {
encoding_futures.push_back(async(launch::async,run_thread_encoding,i+1));
}
all_rec_map_results.clear();
vector<thread_encoding_result> all_encoding_results;
for (auto& fut:encoding_futures) {
all_encoding_results.push_back(fut.get());
}
vector<bit_streamer> globals_bit_stream;
for (auto& res:all_encoding_results) {
globals_bit_stream.insert(globals_bit_stream.end(),res.encoded_files.begin(),res.encoded_files.end());
}
sort(globals_bit_stream.begin(),globals_bit_stream.end(),[](const bit_streamer& a,const bit_streamer& b) {
return a.index<b.index;
});
vector<unsigned char> final_payloads;
vector<size_t> global_payloads_start;
size_t total_size2=0;
for(auto& bstr:globals_bit_stream) total_size2+=bstr.get_size();
final_payloads.reserve(total_size2);
size_t current_offset=0;
for (auto& bstr:globals_bit_stream) {
global_payloads_start.push_back(current_offset);
auto encoded_file=std::move(bstr.extract_buffer());
final_payloads.insert(final_payloads.end(),encoded_file.begin(),encoded_file.end());
current_offset+=encoded_file.size();
}
//
// Payload compression
//
vector<unsigned char> payload_compressed;
cout<<"Files Payloads (in bytes): "<<final_payloads.size()<<endl;
payload_compressed.resize(final_payloads.size()+final_payloads.size()/3+128);
lzma_mt mt_options={};
mt_options.flags=0;
mt_options.threads=thread::hardware_concurrency();
mt_options.block_size=max((size_t)8*1024*1024,final_payloads.size()/mt_options.threads);
mt_options.timeout=0;
mt_options.filters=nullptr;
mt_options.check=LZMA_CHECK_CRC64;
lzma_options_lzma opt_lzma;
if (lzma_lzma_preset(&opt_lzma,compression_ratio)) {
cout<<"Error: couldn't initialize LZMA compressor for files archive."<<endl;
return -1;
}
lzma_filter filters[2];
filters[0].id=LZMA_FILTER_LZMA2;
filters[0].options=&opt_lzma;
filters[1].id=LZMA_VLI_UNKNOWN;
mt_options.filters=filters;
lzma_stream strm=LZMA_STREAM_INIT;
auto ret=lzma_stream_encoder_mt(&strm,&mt_options);
if (ret!=LZMA_OK) {
cout<<"Error: couldn't initialize MT compressor for files archives."<<endl;
return -1;
}
strm.next_in=final_payloads.data();
strm.avail_in=final_payloads.size();
strm.next_out=payload_compressed.data();
strm.avail_out=payload_compressed.size();
auto start=chrono::high_resolution_clock::now();
ret=lzma_code(&strm,LZMA_FINISH);
auto end=chrono::high_resolution_clock::now();
auto ns=chrono::duration_cast<chrono::nanoseconds>(end-start).count();
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress files archive."<<endl;
return -1;
}
size_t payload_total_size;
size_t compressed_size=payload_compressed.size()-strm.avail_out;
payload_compressed.resize(compressed_size);
cout<<"Files Payload compressed (in bytes): "<<compressed_size<<endl;
size_t original_size=final_payloads.size();
uint8_t flags=0;
if (compressed_size>=original_size) {
flags&= ~(0b00000001);
payload_total_size=original_size;
vector<unsigned char>().swap(payload_compressed);
} else {
flags|=0b00000001;
payload_total_size=compressed_size;
vector<unsigned char>().swap(final_payloads);
}
lzma_end(&strm);
//
// Rec table compression
//
vector<unsigned char> rec_table;
for (size_t i=0;i<rec_list.size();i++) {
for (auto c:rec_list[i]) {
rec_table.push_back(c);
}
rec_table.push_back('\0');
}
vector<unsigned char> rec_table_compressed;
cout<<"Reccurences table (in bytes): "<<rec_table.size()<<endl;
rec_table_compressed.resize(rec_table.size()+rec_table.size()/3+128);
mt_options={};
mt_options.flags=0;
mt_options.threads=thread::hardware_concurrency();
mt_options.block_size=max((size_t)8*1024*1024,rec_table.size()/mt_options.threads);
mt_options.timeout=0;
mt_options.filters=nullptr;
mt_options.check=LZMA_CHECK_CRC64;
lzma_options_lzma opt_lzma2;
if (lzma_lzma_preset(&opt_lzma2,compression_ratio)) {
cout<<"Error: couldn't initialize LZMA compressor for reccurences table."<<endl;
return -1;
}
lzma_filter filters2[2];
filters2[0].id=LZMA_FILTER_LZMA2;
filters2[0].options=&opt_lzma2;
filters2[1].id=LZMA_VLI_UNKNOWN;
mt_options.filters=filters2;
lzma_stream strm2=LZMA_STREAM_INIT;
ret=lzma_stream_encoder_mt(&strm2,&mt_options);
if (ret!=LZMA_OK) {
cout<<"Error: couldn't initialize MT compressor for reccurences table."<<endl;
return -1;
}
strm2.next_in=rec_table.data();
strm2.avail_in=rec_table.size();
strm2.next_out=rec_table_compressed.data();
strm2.avail_out=rec_table_compressed.size();
start=chrono::high_resolution_clock::now();
ret=lzma_code(&strm2,LZMA_FINISH);
end=chrono::high_resolution_clock::now();
ns=chrono::duration_cast<chrono::nanoseconds>(end-start).count();
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress reccurences table."<<endl;
return -1;
}
size_t rec_table_total_size;
compressed_size=rec_table_compressed.size()-strm2.avail_out;
rec_table_compressed.resize(compressed_size);
cout<<"Reccurences table compressed (in bytes): "<<compressed_size<<endl;
original_size=rec_table.size();
if (compressed_size>=original_size) {
flags&= ~(0b00000010);
rec_table_total_size=original_size;
vector<unsigned char>().swap(rec_table_compressed);
} else {
flags|=0b00000010;
rec_table_total_size=compressed_size;
vector<unsigned char>().swap(rec_table);
}
lzma_end(&strm2);
//
// Files table
//
vector<unsigned char> files_table;
for (int i=0;i<files.size();i++) {
for (auto c:files[i]) {
files_table.push_back(c);
}
files_table.push_back('\0');
auto file_start=global_payloads_start[i];
for (int i=0;i<sizeof(size_t);++i) {
files_table.push_back(((uint8_t*)&file_start)[i]);
}
size_t file_size;
if (i==files.size()-1) {
file_size=final_payloads.size()-global_payloads_start[i];
} else {
file_size=global_payloads_start[i+1]-global_payloads_start[i];
}
for (int i=0;i<sizeof(size_t);++i) {
files_table.push_back(((uint8_t*)&file_size)[i]);
}
}
cout<<"Files table (in bytes): "<<files_table.size()<<endl;
vector<unsigned char> files_table_compressed;
files_table_compressed.resize(files_table.size()+files_table.size()/3+128);
strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for files table."<<endl;
return -1;
}
strm.next_in=files_table.data();
strm.avail_in=files_table.size();
strm.next_out=files_table_compressed.data();
strm.avail_out=files_table_compressed.size();
ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress files table."<<endl;
return -1;
}
size_t files_table_total_size;
compressed_size=files_table_compressed.size()-strm.avail_out;
files_table_compressed.resize(compressed_size);
cout<<"Files table compressed (in bytes): "<<compressed_size<<endl;
original_size=files_table.size();
lzma_end(&strm);
if (compressed_size>=original_size) {
flags&= ~(0b00000100);
files_table_total_size=original_size;
vector<unsigned char>().swap(files_table);
} else {
flags|=0b00000100;
files_table_total_size=compressed_size;
vector<unsigned char>().swap(files_table_compressed);
}
header head;
head.sig[0]='C';
head.sig[1]='C';
head.sig[2]='C';
head.flags=flags;
head.size_payload=payload_total_size;
head.size_rec_table=rec_table_total_size;
head.entry_count=files.size();
vector<unsigned char> out;
for (int i=0;i<sizeof(header);i++) {
out.push_back(((uint8_t*)&head)[i]);
}
if (flags & 0b00000010) {
CCC_ADD_COMPONENT(out,rec_table_compressed);
} else {
CCC_ADD_COMPONENT(out,rec_table);
}
if (flags & 0b00000100) {
CCC_ADD_COMPONENT(out,files_table_compressed);
} else {
CCC_ADD_COMPONENT(out,files_table);
}
if (flags & 0b00000001) {
CCC_ADD_COMPONENT(out,payload_compressed);
} else {
CCC_ADD_COMPONENT(out,final_payloads);
}
ofstream fileout("test.ccc",ios::binary);
if (!fileout) {
cout<<"Error: couldn't open output file."<<endl;
return -1;
}
fileout.write(reinterpret_cast<const char*>(out.data()),out.size());
fileout.close();
cout<<"Finished !"<<endl;
return 0;
}

950
ccc.cpp.save Normal file
View File

@@ -0,0 +1,950 @@
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <exception>
#include <iostream>
#include <filesystem>
#include <fstream>
#include <mutex>
#include <stdint.h>
#include <string>
#include <string_view>
#include <threads.h>
#include <vector>
#include <map>
#include <unordered_map>
#include <iterator>
#include <algorithm>
#include <thread>
#include <future>
#include <queue>
#include <chrono>
#include <tree_sitter/api.h>
#include <tree_sitter/tree-sitter-c.h>
#include <lzma.h>
#include <xxh3.h>
#include <malloc.h>
using namespace std;
namespace fs=filesystem;
const uint64_t CCC_DELIMITER_0_HEAD=0b0;
const uint64_t CCC_DELIMITER_1_HEAD=0b10;
const uint64_t CCC_C_KEYWORD_HEAD=0b1100;
const uint64_t CCC_MISCELANEOUS_HEAD=0b1101;
const uint64_t CCC_STRING_INLINE_HEAD=0b1110;
const uint64_t CCC_REC_TABLE_REF_HEAD=0b1111;
const uint64_t CCC_STRING_INLINE_END=0b00000000;
#define CCC_ADD_COMPONENT(vec,tail) \
do { \
auto tmp=tail; \
vec.insert(vec.end(),tmp.begin(),tmp.end()); \
} while (0)
struct XXH3HasherString {
size_t operator()(const std::string& s) const {
return static_cast<size_t>(XXH3_64bits(s.data(),s.size()));
}
};
class bit_streamer {
private:
vector<uint8_t> out;
uint8_t current_byte=0;
uint8_t bit_pos=0;
public:
size_t index;
bit_streamer(size_t index) {
out.reserve(1024*1024);
this->index=index;
}
size_t get_size() {
return out.size();
}
void write_bits(uint64_t value,uint8_t count) {
for (int i=count-1;i>=0;--i) {
if ((value>>i) & 1) {
current_byte|=(1<<(7-bit_pos));
}
bit_pos++;
if (bit_pos==8) {
out.push_back(current_byte);
current_byte=0;
bit_pos=0;
}
}
}
void align() {
if (bit_pos>0) {
out.push_back(current_byte);
current_byte=0;
bit_pos=0;
}
}
const vector<uint8_t>& get_out() const {
return out;
}
vector<uint8_t> extract_buffer() {
align();
return std::move(out);
}
};
const vector<string> delimiter0={
"{",
"}",
"(",
")",
"[",
"]",
",",
"."
};
const vector<string> delimiter1={
"{}",
"()",
"[]",
";"
};
const vector<string> miscellaneous={
"!",
"%",
"'",
"*",
"+",
"-",
"/",
":",
"<",
">",
"=",
"?",
"^",
"|",
"&",
"~",
"+=",
"-=",
"*=",
"/=",
"%=",
"&=",
"|=",
"^=",
"<<=",
">>=",
"++",
"--",
"<<",
">>",
"==",
"!=",
"<=",
">=",
"->",
"...",
"||",
"&&",
"NULL",
"size_t",
"uint8_t",
"uint16_t",
"uint32_t",
"uint64_t",
"int8_t",
"int16_t",
"int32_t",
"int64_t"
};
const vector<string> c_keywords={
"#if",
"#ifdef",
"#ifndef",
"#else",
"#elif",
"#elifdef",
"#elifndef",
"#endif",
"#define",
"#undef",
"#include",
"#error",
"#warning",
"#pragma",
"#line",
"alignas",
"alignof",
"auto",
"bool",
"break",
"case",
"char",
"const",
"constexpr",
"continue",
"default",
"do",
"double",
"else",
"enum",
"extern",
"false",
"float",
"for",
"goto",
"if",
"inline",
"int",
"long",
"nullptr",
"register",
"restrict",
"return",
"short",
"signed",
"sizeof",
"static",
"static_assert",
"struct",
"switch",
"thread_local",
"true",
"typedef",
"typeof",
"typeof_unequal",
"union",
"unsigned",
"void",
"volatile",
"while",
"__asm__",
"__attribute__",
"defined",
};
#pragma pack(push,1)
struct header {
uint8_t sig[3];
uint8_t flags;
size_t size_rec_table;
size_t entry_count;
size_t size_payload;
};
#pragma pack(pop)
struct node {
uint16_t type;
uint32_t start;
uint32_t end;
};
struct file_entry {
string name;
string content;
size_t size;
size_t index;
};
struct thread_iterate_input_loop_call {
string &source_code;
vector<node> &thread_local_node_list;
unordered_map<uint16_t,string>& thread_local_type_map;
unordered_map<string,uint16_t,XXH3HasherString>& thread_local_type_u16_map;
uint16_t thread_local_next_type_id;
map<string,int> thread_local_rec_map;
};
struct thread_rec_map_result {
map<string,int> thread_local_rec_map;
};
struct thread_encoding_input_loop_call {
string &source_code;
vector<node> &node_list;
unordered_map<uint16_t,string>& thread_local_type_map;
bit_streamer& thread_local_bit_stream;
};
struct thread_encoding_result {
vector<bit_streamer> encoded_files;
};
enum iterating_mode {
REC_MAP,
PARSING
};
queue<file_entry> rec_map_files_queue;
mutex rec_map_queue_mutex;
queue<file_entry> encoding_files_queue;
mutex encoding_queue_mutex;
vector<string> rec_list;
unordered_map<string,size_t,XXH3HasherString,std::equal_to<>> rec_lookup;
unordered_map<string,size_t,XXH3HasherString,std::equal_to<>> c_keyword_lookup;
unordered_map<string,size_t,XXH3HasherString,std::equal_to<>> miscelaneous_lookup;
unordered_map<string,size_t,XXH3HasherString,std::equal_to<>> delimiter0_lookup;
unordered_map<string,size_t,XXH3HasherString,std::equal_to<>> delimiter1_lookup;
bool show_warning=false;
bool fail_on_warning=false;
bool enable_malloc_trim=true;
void iterate_all_nodes_loop_call(thread_iterate_input_loop_call &settings,TSNode current_node,iterating_mode mode) {
if (ts_node_child_count(current_node)==0) {
uint32_t start=ts_node_start_byte(current_node);
uint32_t end=ts_node_end_byte(current_node);
string_view text{settings.source_code.data()+start,end-start};
string type=string(ts_node_type(current_node));
if (mode==iterating_mode::REC_MAP) {
if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="type_identifier" || type=="field_identifier" || type=="escape_sequence" || type=="statement_identifier") {
settings.thread_local_rec_map[string(text)]++;
}
if (type=="primitive_type" && find(c_keywords.begin(),c_keywords.end(),text)==c_keywords.end()) {
settings.thread_local_rec_map[string(text)]++;
}
if (type=="comment") {
settings.thread_local_rec_map[string(text)]=2;
}
} else if (mode==iterating_mode::PARSING) {
if (settings.thread_local_type_u16_map.find(type)==settings.thread_local_type_u16_map.end()) {
settings.thread_local_type_u16_map[type]=settings.thread_local_next_type_id;
settings.thread_local_type_map[settings.thread_local_type_u16_map.at(type)]=type;
settings.thread_local_next_type_id++;
}
settings.thread_local_node_list.push_back({.type=settings.thread_local_type_u16_map[type],.start=start,.end=end});
}
} else {
uint32_t child_count=ts_node_child_count(current_node);
for (uint32_t i=0;i<child_count;++i) {
TSNode child=ts_node_child(current_node,i);
iterate_all_nodes_loop_call(settings,child,mode);
}
}
}
thread_rec_map_result run_thread_rec_map(size_t thread_num) {
auto start=chrono::high_resolution_clock::now();
thread_rec_map_result res;
unordered_map<string,uint16_t,XXH3HasherString> useless_type_u16_map;
vector<node> useless_node_vector;
unordered_map<uint16_t,string> useless_type_map;
TSParser *parser=ts_parser_new();
ts_parser_set_language(parser,tree_sitter_c());
int counter=0;
while (true) {
file_entry f;
{
lock_guard<mutex> lock(rec_map_queue_mutex);
if (rec_map_files_queue.empty()) break;
f=std::move(rec_map_files_queue.front());
rec_map_files_queue.pop();
}
thread_iterate_input_loop_call loop_settings {
.source_code=f.content,
.thread_local_node_list=useless_node_vector,
.thread_local_type_map=useless_type_map,
.thread_local_type_u16_map=useless_type_u16_map,
.thread_local_next_type_id=0,
.thread_local_rec_map=res.thread_local_rec_map
};
TSTree *tree=ts_parser_parse_string(parser,nullptr,f.content.c_str(),f.content.size());
TSNode root=ts_tree_root_node(tree);
loop_settings.source_code=f.content;
iterate_all_nodes_loop_call(loop_settings,root,iterating_mode::REC_MAP);
ts_tree_delete(tree);
{
lock_guard<mutex> lock(encoding_queue_mutex);
encoding_files_queue.push(std::move(f));
}
if (++counter%20==0 && enable_malloc_trim) malloc_trim(0);
}
ts_parser_delete(parser);
auto end=chrono::high_resolution_clock::now();
auto ms=chrono::duration_cast<chrono::milliseconds>(end-start).count();
cout<<"Recccurences map thread number "<<thread_num<<" finished succesfully on "<<ms<<" milliseconds."<<endl;
return res;
}
void generate_c_keyword(bit_streamer& bitstream,size_t index) {
bitstream.align();
bitstream.write_bits(CCC_C_KEYWORD_HEAD,4);
bitstream.write_bits(index,6);
bitstream.align();
return;
}
void generate_rec(bit_streamer& bitstream,size_t index,size_t total_recs) {
size_t bits=0;
while (total_recs) {
total_recs>>=1;
++bits;
}
bitstream.align();
bitstream.write_bits(CCC_REC_TABLE_REF_HEAD,4);
bitstream.write_bits(index,bits);
bitstream.align();
return;
}
void generate_delimiter0(bit_streamer& bitstream,size_t index) {
bitstream.align();
bitstream.write_bits(CCC_DELIMITER_0_HEAD,1);
bitstream.write_bits(index,3);
bitstream.align();
return;
}
void generate_delimiter1(bit_streamer& bitstream,size_t index) {
bitstream.align();
bitstream.write_bits(CCC_DELIMITER_1_HEAD,2);
bitstream.write_bits(index,2);
bitstream.align();
return;
}
void generate_miscellaneous(bit_streamer& bitstream,size_t index) {
bitstream.align();
bitstream.write_bits(CCC_MISCELANEOUS_HEAD,4);
bitstream.write_bits(index,6);
bitstream.align();
}
void generate_string_content(bit_streamer& bitstream,const char *text,size_t text_len) {
bitstream.align();
bitstream.write_bits(CCC_STRING_INLINE_HEAD,4);
for (int i=0;i<text_len;i++) {
bitstream.write_bits(text[i],8);
}
bitstream.write_bits(CCC_STRING_INLINE_END,8);
bitstream.align();
return;
}
void print_warning(string text) {
if (show_warning==true) {
cout<<text<<endl;
}
}
void fail_if_warning() {
if (fail_on_warning) {
exit(-1);
}
}
void process_file_nodes_loop_call(thread_encoding_input_loop_call& settings) {
bit_streamer& out=settings.thread_local_bit_stream;
for (int i=0;i<settings.node_list.size();i++) {
node n=settings.node_list.at(i);
string type=settings.thread_local_type_map[n.type];
char temp[256];
string_view text{settings.source_code.data()+n.start,n.end-n.start};
if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="field_identifier" || type=="preproc_arg" || type=="escape_sequence" || type=="character" || type=="statement_identifier") {
auto it=rec_lookup.find(string(text));
if (it==rec_lookup.end()) {
generate_string_content(out,text.data(),text.size());
} else {
size_t index=it->second;
generate_rec(out,index,rec_list.size());
}
} else if (type=="primitive_type" || type=="type_identifier") {
auto it=c_keyword_lookup.find(string(text));
if (it!=c_keyword_lookup.end()) {
size_t index=it->second;
generate_c_keyword(out,index);
} else {
auto it=rec_lookup.find(string(text));
if (it==rec_lookup.end()) {
if (!text.empty()) {
generate_string_content(out,text.data(),text.size());
} else {
print_warning("Warning: type node is empty: "+string(text));
fail_if_warning();
}
} else {
size_t index=it->second;
generate_rec(out,index,rec_list.size());
}
}
} else if (delimiter0_lookup.find(type)!=delimiter0_lookup.end() || delimiter1_lookup.find(type)!=delimiter1_lookup.end() || type=="\"") {
string insert;
if (type=="(" && i+1<settings.node_list.size()) {
if (settings.thread_local_type_map[settings.node_list.at(i+1).type]==")") {
insert="()";
i++;
} else {
insert="(";
}
} else if (type=="[" && i+1<settings.node_list.size()) {
if (settings.thread_local_type_map[settings.node_list.at(i+1).type]=="]") {
insert="[]";
i++;
} else {
insert="[";
}
} else if (type=="{" && i+1<settings.node_list.size()) {
if (settings.thread_local_type_map[settings.node_list.at(i+1).type]=="}") {
insert="{}";
i++;
} else {
insert="{";
}
} else {
insert=type;
}
auto it=delimiter0_lookup.find(insert);
if (it!=delimiter0_lookup.end()) {
size_t index=it->second;
generate_delimiter0(out,index);
} else {
if (insert!="{}" && insert!="\"") {
auto it=delimiter1_lookup.find(insert);
if (it!=delimiter1_lookup.end()) {
size_t index=it->second;
generate_delimiter1(out,index);
} else {
print_warning("Warning: unknow delimiter, that shouldn't happen: "+insert);
fail_if_warning();
}
} else {
if (insert=="{}") {
auto it=delimiter1_lookup.find("{}");
if (it!=delimiter1_lookup.end()) {
size_t index=it->second;
out.align();
out.write_bits(CCC_DELIMITER_1_HEAD,2);
out.write_bits(index,2);
out.write_bits(0b0,1);
out.align();
} else {
print_warning("Warning: unknow delimiter, that shouldn't happen: "+insert);
fail_if_warning();
}
} else if (insert=="\"") {
auto it=delimiter1_lookup.find("{}");
if (it!=delimiter1_lookup.end()) {
size_t index=it->second;
out.align();
out.write_bits(CCC_DELIMITER_1_HEAD,2);
out.write_bits(index,2);
out.write_bits(0b1,1);
out.align();
} else {
print_warning("Warning: unknow delimiter, that shouldn't happen: "+insert);
fail_if_warning();
}
} else {
print_warning("Warning: unknow delimiter, that shouldn't happen: "+insert);
fail_if_warning();
}
}
}
} else if (c_keyword_lookup.find(type)!=c_keyword_lookup.end() || type=="preproc_directive") {
if (type!="preproc_directive") {
auto it=c_keyword_lookup.find(type);
if (it!=c_keyword_lookup.end()) {
size_t index=it->second;
generate_c_keyword(out,index);
} else {
print_warning("Warning: unknow C keyword, that shouldn't happen: "+type+" "+string(text));
fail_if_warning();
}
} else {
auto it=c_keyword_lookup.find(string(text));
if (it!=c_keyword_lookup.end()) {
size_t index=it->second;
generate_c_keyword(out,index);
} else {
auto it=rec_lookup.find(string(text));
if (it==rec_lookup.end()) {
if (!text.empty()) {
generate_string_content(out,text.data(),text.size());
} else {
print_warning("Warning: C keyword is empty: "+string(text));
fail_if_warning();
}
} else {
size_t index=it->second;
generate_rec(out,index,rec_list.size());
}
}
}
} else if (miscelaneous_lookup.find(type)!=miscelaneous_lookup.end()) {
auto it=miscelaneous_lookup.find(type);
if (it!=miscelaneous_lookup.end()) {
size_t index=it->second;
generate_miscellaneous(out,index);
} else {
print_warning("Warning: unknow miscellaneous, that shouldn't happen: "+type);
fail_if_warning();
}
} else if (type=="comment") {
auto it=rec_lookup.find(string(text));
if (it==rec_lookup.end()) {
if (!text.empty()) {
generate_string_content(out,text.data(),text.size());
} else {
print_warning("Warning: comment is empty: "+string(text));
fail_if_warning();
}
} else {
size_t index=it->second;
generate_rec(out,index,rec_list.size());
}
} else {
auto it=rec_lookup.find(type);
if (it==rec_lookup.end()) {
if (!text.empty()) {
generate_string_content(out,text.data(),text.size());
} else {
print_warning("Warning: unknow node is empty: "+string(text));
fail_if_warning();
}
} else {
size_t index=it->second;
generate_rec(out,index,rec_list.size());
}
}
}
out.align();
return;
}
thread_encoding_result run_thread_encoding(size_t thread_num) {
auto start=chrono::high_resolution_clock::now();
thread_encoding_result res;
map<string,int> useless_rec_map;
unordered_map<uint16_t,string> thread_local_type_map;
unordered_map<string,uint16_t,XXH3HasherString> thread_local_type_u16_map;
vector<node> thread_local_node_list;
vector<bit_streamer> thread_local_encoded_files;
TSParser *parser=ts_parser_new();
ts_parser_set_language(parser,tree_sitter_c());
int counter=0;
int max=0;
while (true) {
file_entry f;
{
lock_guard<mutex> lock(encoding_queue_mutex);
if (encoding_files_queue.empty()) break;
f=std::move(encoding_files_queue.front());
encoding_files_queue.pop();
}
thread_iterate_input_loop_call iterate_loop_settings {
.source_code=f.content,
.thread_local_node_list=thread_local_node_list,
.thread_local_type_map=thread_local_type_map,
.thread_local_type_u16_map=thread_local_type_u16_map,
.thread_local_next_type_id=0,
.thread_local_rec_map=useless_rec_map
};
thread_local_encoded_files.emplace_back(f.index);
thread_encoding_input_loop_call encoding_loop_settings {
.source_code=f.content,
.node_list=thread_local_node_list,
.thread_local_type_map=thread_local_type_map,
.thread_local_bit_stream=thread_local_encoded_files[counter]
};
TSTree *tree=ts_parser_parse_string(parser,nullptr,f.content.c_str(),f.content.size());
TSNode root=ts_tree_root_node(tree);
iterate_loop_settings.source_code=f.content;
iterate_all_nodes_loop_call(iterate_loop_settings,root,iterating_mode::PARSING);
ts_tree_delete(tree);
encoding_loop_settings.source_code=f.content;
process_file_nodes_loop_call(encoding_loop_settings);
vector<node>().swap(thread_local_node_list);
thread_local_type_map.clear();
thread_local_type_u16_map.clear();
string().swap(f.content);
if (++counter%20==0 && enable_malloc_trim) malloc_trim(0);
}
ts_parser_delete(parser);
res.encoded_files=std::move(thread_local_encoded_files);
auto end=chrono::high_resolution_clock::now();
auto ms=chrono::duration_cast<chrono::milliseconds>(end-start).count();
cout<<"Parsing/encoding thread number "<<thread_num<<" finished succesfully on "<<ms<<" milliseconds."<<endl;
return res;
}
int main(int argc,char **argv) {
for (int i=0;i<c_keywords.size();i++) {
c_keyword_lookup[c_keywords[i]]=i;
}
for (int i=0;i<miscellaneous.size();i++) {
miscelaneous_lookup[miscellaneous[i]]=i;
}
for (int i=0;i<delimiter0.size();i++) {
delimiter0_lookup[delimiter0[i]]=i;
}
for (int i=0;i<delimiter1.size();i++) {
delimiter1_lookup[delimiter1[i]]=i;
}
if (argc<2) {
cout<<"Usage: ccc [FILES]"<<endl;
return -1;
}
size_t compression_ratio=6;
vector<string> files;
for (int i=1;i<argc;i++) {
string file=string(argv[i]);
if (file=="-W") {
fail_on_warning=true;
continue;
}
if (file=="-w") {
show_warning=true;
continue;
}
if (file.substr(0,2)=="-c" && file.size()==3) {
try {
compression_ratio=stoi(file.substr(2,1));
continue;
} catch (const exception& e) {
cout<<"Error: invalid argument: "<<file<<endl;
return -1;
}
}
if (file=="-f") {
enable_malloc_trim=false;
continue;
}
if (file=="-h" || file=="--help") {
cout<<"C Code Compressor v0.1"<<endl;
cout<<"Usage: ccc [-hfwW] [FILES]"<<endl;
cout<<"Options:"<<endl;
cout<<" -h : show this help message"<<endl;
cout<<" -f : enable fast mode, reduce the total compression time but does not release unused"<<endl;
cout<<" unused heap memory back to the OS. Usage of this option can raise memory usage."<<endl;
cout<<" -w : show warning messages. For example, when a unknown or empty node is detected."<<endl;
cout<<" -W : crash on warning"<<endl;
cout<<" -c0..9: set the compression ratio for LZMA multithreaded compression phase. Default is 6."<<endl;
cout<<" Below level 6, CCC may not compress better than tar.xz."<<endl;
cout<<" Warning: setting this higher than -c6 will seriously raise memory usage."<<endl;
cout<<" For exemple, using -c9 more than double memory usage in comparison with"<<endl;
cout<<" -c6 (which is the default). "<<endl;
cout<<" Warning: usage of higher options than -c6 combined with -f is heavily not"<<endl;
cout<<" recommended."<<endl;
return 0;
}
if (!fs::exists(file)) {
cout<<"Error: file doesn't exist: "<<file<<endl;
return -1;
}
files.push_back(file);
}
for (int i=0;i<files.size();i++) {
ifstream file(files[i],ios::binary);
if (!file) {
cout<<"Error: couldn't open provided file."<<endl;
return -1;
}
string code((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
file_entry f{files[i],std::move(code),code.size()};
f.index=i;
rec_map_files_queue.push(std::move(f));
}
size_t nb_threads=thread::hardware_concurrency()/2;
size_t total_files=files.size();
size_t files_per_thread=(total_files+nb_threads-1)/nb_threads;
vector<future<thread_rec_map_result>> rec_map_futures;
for (size_t i=0;i<nb_threads;++i) {
rec_map_futures.push_back(async(launch::async,run_thread_rec_map,i+1));
}
vector<thread_rec_map_result> all_rec_map_results;
map<string,int> global_rec_map;
for (auto& fut:rec_map_futures) {
all_rec_map_results.push_back(fut.get());
for (auto const& [str,count]:all_rec_map_results.back().thread_local_rec_map) {
global_rec_map[str]+=count;
}
}
for (auto const& [str,count]:global_rec_map) {
if (count>=2 && str.size()>=3) {
rec_list.push_back(str);
rec_lookup[str]=rec_list.size()-1;
}
}
global_rec_map.clear();
vector<file_entry> encoding_files_vec;
while (!encoding_files_queue.empty()) {
encoding_files_vec.push_back(std::move(encoding_files_queue.front()));
encoding_files_queue.pop();
}
sort(encoding_files_vec.begin(),encoding_files_vec.end(),[](const file_entry& a,const file_entry& b) {
return a.size>b.size;
});
for (auto& f:encoding_files_vec) {
encoding_files_queue.push(std::move(f));
}
vector<future<thread_encoding_result>> encoding_futures;
for (size_t i=0;i<all_rec_map_results.size();++i) {
encoding_futures.push_back(async(launch::async,run_thread_encoding,i+1));
}
all_rec_map_results.clear();
vector<thread_encoding_result> all_encoding_results;
for (auto& fut:encoding_futures) {
all_encoding_results.push_back(fut.get());
}
vector<bit_streamer> globals_bit_stream;
for (auto& res:all_encoding_results) {
globals_bit_stream.insert(globals_bit_stream.end(),res.encoded_files.begin(),res.encoded_files.end());
}
sort(globals_bit_stream.begin(),globals_bit_stream.end(),[](const bit_streamer& a,const bit_streamer& b) {
return a.index<b.index;
});
vector<unsigned char> final_payloads;
vector<size_t> global_payloads_start;
size_t total_size2=0;
for(auto& bstr:globals_bit_stream) total_size2+=bstr.get_size();
final_payloads.reserve(total_size2);
size_t current_offset=0;
for (auto& bstr:globals_bit_stream) {
global_payloads_start.push_back(current_offset);
auto encoded_file=std::move(bstr.extract_buffer());
final_payloads.insert(final_payloads.end(),encoded_file.begin(),encoded_file.end());
current_offset+=encoded_file.size();
}
vector<unsigned char> payload_compressed;
payload_compressed.resize(final_payloads.size()+final_payloads.size()/3+128);
lzma_mt mt_options={};
mt_options.flags=0;
mt_options.threads=thread::hardware_concurrency()/4;
mt_options.block_size=max((size_t)8*1024*1024,final_payloads.size()/mt_options.threads);
mt_options.timeout=0;
mt_options.filters=nullptr;
mt_options.check=LZMA_CHECK_CRC64;
lzma_options_lzma opt_lzma;
if (lzma_lzma_preset(&opt_lzma,compression_ratio)) {
cout<<"Error: couldn't initialize LZMA compressor for files archive."<<endl;
return -1;
}
lzma_filter filters[2];
filters[0].id=LZMA_FILTER_LZMA2;
filters[0].options=&opt_lzma;
filters[1].id=LZMA_VLI_UNKNOWN;
mt_options.filters=filters;
lzma_stream strm=LZMA_STREAM_INIT;
auto ret=lzma_stream_encoder_mt(&strm,&mt_options);
if (ret!=LZMA_OK) {
cout<<"Error: couldn't initialize MT compressor for files archives."<<endl;
return -1;
}
strm.next_in=final_payloads.data();
strm.avail_in=final_payloads.size();
strm.next_out=payload_compressed.data();
strm.avail_out=payload_compressed.size();
auto start=chrono::high_resolution_clock::now();
ret=lzma_code(&strm,LZMA_FINISH);
auto end=chrono::high_resolution_clock::now();
auto ns=chrono::duration_cast<chrono::nanoseconds>(end-start).count();
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress files archive."<<endl;
return -1;
}
cout<<"Compressed payloads."<<endl;
size_t payload_total_size;
size_t compressed_size=payload_compressed.size()-strm.avail_out;
payload_compressed.resize(compressed_size);
size_t original_size=final_payloads.size();
uint8_t flags=0;
if (compressed_size>=original_size) {
flags&= ~(0b00000001);
payload_total_size=original_size;
} else {
flags|=0b00000001;
payload_total_size=compressed_size;
}
vector<unsigned char> rec_table;
for (int i=0;i<rec_list.size();i++) {
for (auto c:rec_list[i]) {
rec_table.push_back(c);
}
rec_table.push_back('\0');
}
vector<unsigned char> rec_table_compressed;
rec_table_compressed.resize(rec_table.size()+rec_table.size()/3+128);
strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for reccurences table."<<endl;
return -1;
}
strm.next_in=rec_table.data();
strm.avail_in=rec_table.size();
strm.next_out=rec_table_compressed.data();
strm.avail_out=rec_table_compressed.size();
ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress reccurences table."<<endl;
return -1;
}
cout<<"Compressed reccurences table."<<endl;
size_t rec_table_total_size;
compressed_size=rec_table_compressed.size()-strm.avail_out;
rec_table_compressed.resize(compressed_size);
original_size=rec_table.size();
lzma_end(&strm);
if (compressed_size>=original_size) {
flags&= ~(0b00000010);
rec_table_total_size=original_size;
} else {
flags|=0b00000010;
rec_table_total_size=compressed_size;
}
vector<unsigned char> files_table;
for (int i=0;i<files.size();i++) {
for (auto c:files[i]) {
files_table.push_back(c);
}
files_table.push_back('\0');
auto file_start=global_payloads_start[i];
for (int i=0;i<sizeof(size_t);++i) {
files_table.push_back(((uint8_t*)&file_start)[i]);
}
size_t file_size;
if (i==files.size()-1) {
file_size=final_payloads.size()-global_payloads_start[i];
} else {
file_size=global_payloads_start[i+1]-global_payloads_start[i];
}
for (int i=0;i<sizeof(size_t);++i) {
files_table.push_back(((uint8_t*)&file_size)[i]);
}
}
vector<unsigned char> files_table_compressed;
files_table_compressed.resize(files_table.size()+files_table.size()/3+128);
strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for files table."<<endl;
return -1;
}
strm.next_in=files_table.data();
strm.avail_in=files_table.size();
strm.next_out=files_table_compressed.data();
strm.avail_out=files_table_compressed.size();
ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress files table."<<endl;
return -1;
}
cout<<"Compressed files table."<<endl;
size_t files_table_total_size;
compressed_size=files_table_compressed.size()-strm.avail_out;
files_table_compressed.resize(compressed_size);
original_size=files_table.size();
lzma_end(&strm);
if (compressed_size>=original_size) {
flags&= ~(0b00000100);
files_table_total_size=original_size;
} else {
flags|=0b00000100;
files_table_total_size=compressed_size;
}
header head;
head.sig[0]='C';
head.sig[1]='C';
head.sig[2]='C';
head.flags=flags;
head.size_payload=payload_total_size;
head.size_rec_table=rec_table_total_size;
head.entry_count=files.size();
vector<unsigned char> out;
for (int i=0;i<sizeof(header);i++) {
out.push_back(((uint8_t*)&head)[i]);
}
if (flags & 0b00000010) {
CCC_ADD_COMPONENT(out,rec_table_compressed);
} else {
CCC_ADD_COMPONENT(out,rec_table);
}
if (flags & 0b00000100) {
CCC_ADD_COMPONENT(out,files_table_compressed);
} else {
CCC_ADD_COMPONENT(out,files_table);
}
if (flags & 0b00000001) {
CCC_ADD_COMPONENT(out,payload_compressed);
} else {
CCC_ADD_COMPONENT(out,final_payloads);
}
ofstream fileout("test.ccc",ios::binary);
if (!fileout) {
cout<<"Error: couldn't open output file."<<endl;
return -1;
}
fileout.write(reinterpret_cast<const char*>(out.data()),out.size());
fileout.close();
cout<<"Finished !"<<endl;
return 0;
}

BIN
ccc_old Executable file

Binary file not shown.

677
ccc_old.cpp Normal file
View File

@@ -0,0 +1,677 @@
#include <cstdint>
#include <cstring>
#include <iostream>
#include <filesystem>
#include <fstream>
#include <stdint.h>
#include <string>
#include <map>
#include <unordered_map>
#include <vector>
#include <iterator>
#include <algorithm>
#include <tree_sitter/api.h>
#include <tree_sitter/tree-sitter-c.h>
#include <lzma.h>
using namespace std;
namespace fs=filesystem;
const vector<bool> CCC_DELIMITER_0_HEAD={0};
const vector<bool> CCC_DELIMITER_1_HEAD={1,0};
const vector<bool> CCC_C_KEYWORD_HEAD={1,1,0,0};
const vector<bool> CCC_MISCELANEOUS_HEAD={1,1,0,1};
const vector<bool> CCC_STRING_INLINE_HEAD={1,1,1,0};
const vector<bool> CCC_REC_TABLE_REF_HEAD={1,1,1,1};
const vector<bool> CCC_STRING_INLINE_END={0,0,0,0,0,0,0,0};
#define CCC_ADD_COMPOMENT(vec,tail) \
do { \
auto tmp=tail; \
vec.insert(vec.end(),tmp.begin(),tmp.end()); \
} while (0)
const vector<string> delimiter0={
"{",
"}",
"(",
")",
"[",
"]",
",",
"."
};
const vector<string> delimiter1={
"{}",
"()",
"[]",
";"
};
const vector<string> miscellaneous={
"!",
"%",
"'",
"*",
"+",
"-",
"/",
":",
"<",
">",
"=",
"?",
"^",
"|",
"&",
"~",
"+=",
"-=",
"*=",
"/=",
"%=",
"&=",
"|=",
"^=",
"<<=",
">>=",
"++",
"--",
"<<",
">>",
"==",
"!=",
"<=",
">=",
"->",
"...",
"||",
"&&",
"NULL",
"size_t",
"uint8_t",
"uint16_t",
"uint32_t",
"uint64_t",
"int8_t",
"int16_t",
"int32_t",
"int64_t"
};
const vector<string> c_keywords={
"#if",
"#ifdef",
"#ifndef",
"#else",
"#elif",
"#elifdef",
"#elifndef",
"#endif",
"#define",
"#undef",
"#include",
"#error",
"#warning",
"#pragma",
"#line",
"alignas",
"alignof",
"auto",
"bool",
"break",
"case",
"char",
"const",
"constexpr",
"continue",
"default",
"do",
"double",
"else",
"enum",
"extern",
"false",
"float",
"for",
"goto",
"if",
"inline",
"int",
"long",
"nullptr",
"register",
"restrict",
"return",
"short",
"signed",
"sizeof",
"static",
"static_assert",
"struct",
"switch",
"thread_local",
"true",
"typedef",
"typeof",
"typeof_unequal",
"union",
"unsigned",
"void",
"volatile",
"while",
"__asm__",
"__attribute__",
"defined",
};
struct symbol {
string name;
int score;
};
#pragma pack(push,1)
struct header {
uint8_t sig[3];
uint8_t flags;
size_t size_rec_table;
size_t entry_count;
size_t size_payload;
};
#pragma pack(pop)
map<string,vector<TSNode>> all_tokens;
map<string,int> rec_map;
vector<string> rec_list;
unordered_map<string,size_t> rec_lookup;
unordered_map<string,size_t> c_keyword_lookup;
unordered_map<string,size_t> miscelaneous_lookup;
unordered_map<string,size_t> delimiter0_lookup;
unordered_map<string,size_t> delimiter1_lookup;
bool debug=false;
void get_all_nodes(TSNode node,const string &source_code,map<string,int> &rec_map,const string& file) {
if (ts_node_child_count(node)==0) {
all_tokens[file].push_back(node);
string text=source_code.substr(ts_node_start_byte(node),ts_node_end_byte(node)-ts_node_start_byte(node));
if (string(ts_node_type(node))=="string_content" || string(ts_node_type(node))=="system_lib_string" || string(ts_node_type(node))=="identifier" || string(ts_node_type(node))=="number_literal" || string(ts_node_type(node))=="type_identifier" || string(ts_node_type(node))=="field_identifier" || string(ts_node_type(node))=="escape_sequence" || string(ts_node_type(node))=="statement_identifier") {
rec_map[text]++;
}
if (string(ts_node_type(node))=="primitive_type" && find(c_keywords.begin(),c_keywords.end(),text)==c_keywords.end()) {
rec_map[text]++;
}
if (string(ts_node_type(node))=="comment") {
rec_map[text]=2;
}
} else {
uint32_t child_count=ts_node_child_count(node);
for (uint32_t i=0;i<child_count;++i) {
TSNode child=ts_node_child(node,i);
get_all_nodes(child,source_code,rec_map,file);
}
}
}
vector<bool> byte_to_bits(unsigned char c) {
vector<bool> out;
for (int i=7;i>=0;i--) {
bool enabled=(c>>i)&0x01;
out.push_back(enabled);
}
return out;
}
vector<bool> generate_c_keyword(size_t index) {
vector<bool> out;
CCC_ADD_COMPOMENT(out,CCC_C_KEYWORD_HEAD);
for (int i=5;i>=0;i--) {
bool enabled=(index>>i)&0x01;
out.push_back(enabled);
}
return out;
}
vector<bool> generate_rec(size_t index,size_t total_recs) {
vector<bool> out;
size_t bits=0;
while (total_recs) {
total_recs>>=1;
++bits;
}
CCC_ADD_COMPOMENT(out,CCC_REC_TABLE_REF_HEAD);
for (int i=bits;i>=0;i--) {
bool enabled=(index>>i)&0x01;
out.push_back(enabled);
}
return out;
}
vector<bool> generate_delimiter0(size_t index) {
vector<bool> out;
CCC_ADD_COMPOMENT(out,CCC_DELIMITER_0_HEAD);
for (int i=2;i>=0;i--) {
bool enabled=(index>>i)&0x01;
out.push_back(enabled);
}
return out;
}
vector<bool> generate_delimiter1(size_t index) {
vector<bool> out;
CCC_ADD_COMPOMENT(out,CCC_DELIMITER_1_HEAD);
for (int i=1;i>=0;i--) {
bool enabled=(index>>i)&0x01;
out.push_back(enabled);
}
return out;
}
vector<bool> generate_miscellaneous(size_t index) {
vector<bool> out;
CCC_ADD_COMPOMENT(out,CCC_MISCELANEOUS_HEAD);
for (int i=5;i>=0;i--) {
bool enabled=(index>>i)&0x01;
out.push_back(enabled);
}
return out;
}
vector<bool> generate_string_content(string str) {
vector<bool> out;
CCC_ADD_COMPOMENT(out,CCC_STRING_INLINE_HEAD);
for (auto c:str) {
CCC_ADD_COMPOMENT(out,byte_to_bits(c));
}
CCC_ADD_COMPOMENT(out,CCC_STRING_INLINE_END);
return out;
}
void print_debug(string text) {
if (debug==true) {
cout<<text<<endl;
}
}
vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vector<string> &rec_list) {
vector<bool> out;
for (int i=0;i<nodes->size();i++) {
string type=string(ts_node_type(nodes->at(i)));
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="field_identifier" || type=="preproc_arg" || type=="escape_sequence" || type=="character" || type=="statement_identifier") {
auto it=find(rec_list.begin(),rec_list.end(),text);
if (it==rec_list.end()) {
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
CCC_ADD_COMPOMENT(out,generate_string_content(text));
print_debug("string ("+type+"): "+text);
} else {
size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for string ("+type+"): "+text);
}
} else if (type=="primitive_type" || type=="type_identifier") {
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
auto it=find(c_keywords.begin(),c_keywords.end(),text);
if (it!=c_keywords.end()) {
size_t index=distance(c_keywords.begin(),it);
CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
print_debug("type found in c keyword: "+text);
} else {
auto it=find(rec_list.begin(),rec_list.end(),text);
if (it==rec_list.end()) {
if (!text.empty()) {
CCC_ADD_COMPOMENT(out,generate_string_content(text));
print_debug("string for type ("+type+"): "+text);
} else {
cout<<"Warning: provided primitive is empty: "<<text<<endl;
}
} else {
size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for string for type ("+type+"): "+text);
}
}
} else if (find(delimiter0.begin(),delimiter0.end(),type)!=delimiter0.end() || find(delimiter1.begin(),delimiter1.end(),type)!=delimiter1.end() || type=="\"") {
string insert;
if (type=="(" && i+1<nodes->size()) {
if (string(ts_node_type(nodes->at(i+1)))==")") {
insert="()";
i++;
} else {
insert="(";
}
} else if (type=="[" && i+1<nodes->size()) {
if (string(ts_node_type(nodes->at(i+1)))=="]") {
insert="[]";
i++;
} else {
insert="[";
}
} else if (type=="{" && i+1<nodes->size()) {
if (string(ts_node_type(nodes->at(i+1)))=="}") {
insert="{}";
i++;
} else {
insert="{";
}
} else {
insert=type;
}
auto it=find(delimiter0.begin(),delimiter0.end(),insert);
if (it!=delimiter0.end()) {
size_t index=distance(delimiter0.begin(),it);
CCC_ADD_COMPOMENT(out,generate_delimiter0(index));
print_debug("delimiter 0: "+insert);
} else {
if (insert!="{}" && insert!="\"") {
auto it=find(delimiter1.begin(),delimiter1.end(),insert);
if (it!=delimiter1.end()) {
size_t index=distance(delimiter1.begin(),it);
CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
print_debug("delimiter 1: "+insert);
} else {
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
// exit(-1);
}
} else {
if (insert=="{}") {
auto it=find(delimiter1.begin(),delimiter1.end(),"{}");
if (it!=delimiter1.end()) {
size_t index=distance(delimiter1.begin(),it);
CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
CCC_ADD_COMPOMENT(out,{0});
print_debug("delimiter 1: "+insert);
} else {
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
// exit(-1);
}
} else if (insert=="\"") {
auto it=find(delimiter1.begin(),delimiter1.end(),"{}");
if (it!=delimiter1.end()) {
size_t index=distance(delimiter1.begin(),it);
CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
CCC_ADD_COMPOMENT(out,{1});
print_debug("delimiter 1: "+insert);
} else {
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
exit(-1);
}
} else {
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
// exit(-1);
}
}
}
} else if (find(c_keywords.begin(),c_keywords.end(),type)!=c_keywords.end() || type=="preproc_directive") {
if (type!="preproc_directive") {
auto it=find(c_keywords.begin(),c_keywords.end(),type);
if (it!=c_keywords.end()) {
size_t index=distance(c_keywords.begin(),it);
CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
print_debug("c keyword: "+type);
} else {
cout<<"Error: unknow C keyword, that shouldn't happen: "<<type<<" "<<text<<endl;;
// exit(-1);
}
} else {
auto it=find(c_keywords.begin(),c_keywords.end(),text);
if (it!=c_keywords.end()) {
size_t index=distance(c_keywords.begin(),it);
CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
print_debug("c keyword: "+type);
} else {
auto it=find(rec_list.begin(),rec_list.end(),text);
if (it==rec_list.end()) {
if (!text.empty()) {
CCC_ADD_COMPOMENT(out,generate_string_content(text));
print_debug("string for c keyword ("+type+"): "+text);
} else {
cout<<"Warning: C keyword is empty: "<<text<<endl;
}
} else {
size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for string for c keyword ("+type+"): "+text);
}
}
}
} else if (find(miscellaneous.begin(),miscellaneous.end(),type)!=miscellaneous.end()) {
auto it=find(miscellaneous.begin(),miscellaneous.end(),type);
if (it!=miscellaneous.end()) {
size_t index=distance(miscellaneous.begin(),it);
CCC_ADD_COMPOMENT(out,generate_miscellaneous(index));
print_debug("miscellaneous: "+type);
} else {
cout<<"Error: unknow miscellaneous, that shouldn't happen: "<<type<<endl;;
// exit(-1);
}
} else if (type=="comment") {
auto it=find(rec_list.begin(),rec_list.end(),text);
if (it==rec_list.end()) {
if (it==rec_list.end()) {
if (!text.empty()) {
CCC_ADD_COMPOMENT(out,generate_string_content(text));
print_debug("string for comment("+type+"): "+text);
} else {
cout<<"Warning: unknow node is empty: "<<text<<endl;
}
} else {
size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for string for comment ("+type+"): "+text);
}
} else {
size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for comment");
}
} else {
auto it=find(rec_list.begin(),rec_list.end(),text);
if (it==rec_list.end()) {
if (!text.empty()) {
CCC_ADD_COMPOMENT(out,generate_string_content(text));
print_debug("string for unknow node ("+type+"): "+text);
} else {
cout<<"Warning: unknow node is empty: "<<text<<endl;
}
} else {
size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for string for unknow node ("+type+"): "+text);
}
}
}
vector<unsigned char> payload_bytes;
unsigned char current=0;
size_t bit_index=0;
for (bool b:out) {
current|=(b<<(7-bit_index));
bit_index++;
if (bit_index==8) {
payload_bytes.push_back(current);
current=0;
bit_index=0;
}
}
if (bit_index!=0) {
payload_bytes.push_back(current);
}
return payload_bytes;
}
void construct_rec_table(vector<string> &files_content,vector<string> files_names) {
for (int i=0;i<files_content.size();i++) {
TSParser *parser=ts_parser_new();
ts_parser_set_language(parser,tree_sitter_c());
TSTree *tree=ts_parser_parse_string(parser,nullptr,files_content[i].c_str(),files_content[i].size());
TSNode root=ts_tree_root_node(tree);
get_all_nodes(root,files_content[i],rec_map,files_names[i]);
}
for (auto s:rec_map) {
if (s.second>=2 and s.first.size()>=3) {
rec_list.push_back(s.first);
}
}
}
int main(int argc,char **argv) {
if (argc<2) {
cout<<"Usage: ccc [FILES]"<<endl;
return -1;
}
vector<string> files;
for (int i=1;i<argc;i++) {
string file=string(argv[i]);
if (file=="-v") {
debug=true;
continue;
}
if (!fs::exists(file)) {
cout<<"Error: file doesn't exist: "<<file<<endl;
return -1;
}
files.push_back(file);
}
vector<string> files_content;
for (auto f:files) {
ifstream file(f,ios::binary);
if (!file) {
cout<<"Error: couldn't open provided file."<<endl;
return -1;
}
string code((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
files_content.push_back(code);
}
construct_rec_table(files_content,files);
vector<unsigned char> files_archive;
vector<size_t> payloads_size;
vector<size_t> payloads_start;
for (int i=0;i<files_content.size();i++) {
auto payload_bytes=process_file_nodes(&(all_tokens.at(files[i])),files_content[i],rec_list);
payloads_size.push_back(payload_bytes.size());
payloads_start.push_back(files_archive.size());
CCC_ADD_COMPOMENT(files_archive,payload_bytes);
cout<<i+1<<" file(s) done on "<<files.size()<<": "<<files[i]<<endl;
}
vector<unsigned char> payload_compressed;
payload_compressed.resize(files_archive.size()+files_archive.size()/3+128);
lzma_stream strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for file archive."<<endl;
return -1;
}
strm.next_in=files_archive.data();
strm.avail_in=files_archive.size();
strm.next_out=payload_compressed.data();
strm.avail_out=payload_compressed.size();
auto ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress file archive."<<endl;
return -1;
}
size_t payload_total_size;
size_t compressed_size=payload_compressed.size()-strm.avail_out;
payload_compressed.resize(compressed_size);
size_t original_size=files_archive.size();
lzma_end(&strm);
uint8_t flags=0;
if (compressed_size>=original_size) {
flags&= ~(0b00000001);
payload_total_size=original_size;
} else {
flags|=0b00000001;
payload_total_size=compressed_size;
}
vector<unsigned char> rec_table;
for (int i=0;i<rec_list.size();i++) {
for (auto c:rec_list[i]) {
rec_table.push_back(c);
}
rec_table.push_back('\0');
}
vector<unsigned char> rec_table_compressed;
rec_table_compressed.resize(rec_table.size()+rec_table.size()/3+128);
strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for reccurences table."<<endl;
return -1;
}
strm.next_in=rec_table.data();
strm.avail_in=rec_table.size();
strm.next_out=rec_table_compressed.data();
strm.avail_out=rec_table_compressed.size();
ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress reccurences table."<<endl;
return -1;
}
size_t rec_table_total_size;
compressed_size=rec_table_compressed.size()-strm.avail_out;
rec_table_compressed.resize(compressed_size);
original_size=rec_table.size();
lzma_end(&strm);
if (compressed_size>=original_size) {
flags&= ~(0b00000010);
rec_table_total_size=original_size;
} else {
flags|=0b00000010;
rec_table_total_size=compressed_size;
}
vector<unsigned char> files_table;
for (int i=0;i<files.size();i++) {
for (auto c:files[i]) {
files_table.push_back(c);
}
files_table.push_back('\0');
auto file_start=payloads_start[i];
for (int i=0;i<sizeof(size_t);++i) {
files_table.push_back(((uint8_t*)&file_start)[i]);
}
auto file_size=payloads_size[i];
for (int i=0;i<sizeof(size_t);++i) {
files_table.push_back(((uint8_t*)&file_size)[i]);
}
}
vector<unsigned char> files_table_compressed;
files_table_compressed.resize(files_table.size()+files_table.size()/3+128);
strm=LZMA_STREAM_INIT;
if (lzma_easy_encoder(&strm,9,LZMA_CHECK_CRC64)!=LZMA_OK) {
cout<<"Error: couldn't initialize LZMA compressor for files table."<<endl;
return -1;
}
strm.next_in=files_table.data();
strm.avail_in=files_table.size();
strm.next_out=files_table_compressed.data();
strm.avail_out=files_table_compressed.size();
ret=lzma_code(&strm,LZMA_FINISH);
if (ret!=LZMA_STREAM_END) {
cout<<"Error: couldn't compress files table."<<endl;
return -1;
}
size_t files_table_total_size;
compressed_size=files_table_compressed.size()-strm.avail_out;
files_table_compressed.resize(compressed_size);
original_size=files_table.size();
lzma_end(&strm);
if (compressed_size>=original_size) {
flags&= ~(0b00000100);
files_table_total_size=original_size;
} else {
flags|=0b00000100;
files_table_total_size=compressed_size;
}
header head;
head.sig[0]='C';
head.sig[1]='C';
head.sig[2]='C';
head.flags=flags;
head.size_payload=payload_total_size;
head.size_rec_table=rec_table_total_size;
head.entry_count=files.size();
vector<unsigned char> out;
for (int i=0;i<sizeof(header);i++) {
out.push_back(((uint8_t*)&head)[i]);
}
if (flags & 0b00000010) {
CCC_ADD_COMPOMENT(out,rec_table_compressed);
} else {
CCC_ADD_COMPOMENT(out,rec_table);
}
if (flags & 0b00000100) {
CCC_ADD_COMPOMENT(out,files_table_compressed);
} else {
CCC_ADD_COMPOMENT(out,files_table);
}
if (flags & 0b00000001) {
CCC_ADD_COMPOMENT(out,payload_compressed);
} else {
CCC_ADD_COMPOMENT(out,files_archive);
}
cout<<"Payload final size: "<<payload_total_size<<endl;
ofstream fileout("test_old.ccc",ios::binary);
if (!fileout) {
cout<<"Error: couldn't open output file."<<endl;
return -1;
}
fileout.write(reinterpret_cast<const char*>(out.data()),out.size());
fileout.close();
return 0;
}

13
hello.c Normal file
View File

@@ -0,0 +1,13 @@
#include <stdio.h>
#include <stdint.h>
typedef static unsigned char HEY;
// hello
int main() {
char hello[]="hello";
HEY res=8;
if (res!=9) {
printf(hello);
}
unsigned char r=6;
return r;
}

BIN
linux_sources.tar.xz Normal file

Binary file not shown.

409
log.c Normal file
View File

@@ -0,0 +1,409 @@
#include "../include/log.h"
#include "../include/page.h"
sh_uint8 kernel_log_level=0;
static inline sh_uint8 inb(sh_uint16 port) {
sh_uint8 val;
__asm__ volatile ("inb %1, %0":"=a"(val):"Nd"(port));
return val;
}
static inline void outb(sh_uint16 port,sh_uint8 val) {
__asm__ volatile ("outb %0, %1"::"a"(val),"Nd"(port));
}
SH_STATUS sh_log_send_byte(sh_uint8 b) {
while (!(inb(SH_LOG_SERIAL_PORT_COM1+5) & 0x20));
outb(SH_LOG_SERIAL_PORT_COM1,b);
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_send_string(const char* str) {
while (*str) {
sh_log_send_byte(*str++);
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_send_uintn(sh_uint64 n) {
char buf[20];
sh_uint32 i=0;
if (n==0) {
sh_log_send_byte('0');
return SH_STATUS_SUCCESS;
}
while (n>0) {
buf[i++]='0'+(n%10);
n/=10;
}
while (i--) {
sh_log_send_byte(buf[i]);
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_send_uintn_hex(sh_uint64 n) {
char buf[16];
sh_uint32 i=0;
const char hex_digits[]="0123456789ABCDEF";
if (n==0) {
sh_log_send_byte('0');
return SH_STATUS_SUCCESS;
}
while (n>0) {
buf[i++]=hex_digits[n & 0xF];
n>>=4;
}
while (i--) {
sh_log_send_byte(buf[i]);
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_send_double(double value) {
if (value<0) {
sh_log_send_byte('-');
value=-value;
}
sh_uint64 integer_part=(sh_uint64)value;
double fractional_part=value-(double)integer_part;
SH_STATUS status=sh_log_send_uintn(integer_part);
if (status!=SH_STATUS_SUCCESS) return status;
status=sh_log_send_byte('.');
if (status!=SH_STATUS_SUCCESS) return status;
for (int i=0;i<6;i++) {
fractional_part*=10.0;
sh_uint64 digit=(sh_uint64)fractional_part;
status=sh_log_send_byte('0'+(sh_uint8)digit);
if (status!=SH_STATUS_SUCCESS) return status;
fractional_part-=(double)digit;
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_send_payload(sh_log_OUTPUT_PAYLOAD *payload) {
if (payload==SH_NULLPTR) {
return SH_STATUS_INVALID_PARAMETER;
}
if (!(sh_log_output_type_valid(payload->output_type))) {
return SH_STATUS_INVALID_PARAMETER;
}
if (!(sh_log_output_source_valid(payload->output_source))) {
return SH_STATUS_INVALID_PARAMETER;
}
if (payload->output_source!=SH_LOG_SOURCE_TEST && payload->output_type<kernel_log_level) {
return SH_STATUS_SUCCESS;
}
sh_log_send_string("[Shelter:");
if (payload->output_source==SH_LOG_SOURCE_MAIN) {
sh_log_send_string("Main@");
} else if (payload->output_source==SH_LOG_SOURCE_CONF) {
sh_log_send_string("Conf@");
} else if (payload->output_source==SH_LOG_SOURCE_PAGE) {
sh_log_send_string("Page@");
} else if (payload->output_source==SH_LOG_SOURCE_SLAB) {
sh_log_send_string("Slab@");
} else if (payload->output_source==SH_LOG_SOURCE_TEST) {
sh_log_send_string("Test@");
}
if (payload->output_type==SH_LOG_DEBUG) {
sh_log_send_string("Debug] ");
} else if (payload->output_type==SH_LOG_LOG) {
sh_log_send_string("Log] ");
} else if (payload->output_type==SH_LOG_WARNING) {
sh_log_send_string("Warning] ");
} else if (payload->output_type==SH_LOG_ERROR) {
sh_log_send_string("Error] ");
} else if (payload->output_type==SH_LOG_CRITICAL) {
sh_log_send_string("Critical] ");
} else if (payload->output_type==SH_LOG_FATAL) {
sh_log_send_string("Fatal] ");
} else if (payload->output_type==SH_LOG_TEST) {
sh_log_send_string("Test] ");
}
sh_log_send_uintn(payload->tsc_value);
sh_log_send_string(" : ");
sh_log_send_string(payload->message_pointer);
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_load_log_level(sh_uint8 log_level) {
kernel_log_level=log_level;
return SH_STATUS_SUCCESS;
}
sh_uint8 sh_log_get_log_level() {
return kernel_log_level;
}
SH_STATUS sh_log_test(const char* str) {
if (str==SH_NULLPTR) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_TEST,
.output_source=SH_LOG_SOURCE_TEST,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
sh_log_send_string("\n");
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_debug(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_DEBUG,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
sh_log_send_string("\n");
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_log(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_LOG,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
sh_log_send_string("\n");
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_warning(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_WARNING,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
sh_log_send_string("\n");
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_error(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_ERROR,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
sh_log_send_string("\n");
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_critical(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_CRITICAL,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
sh_log_send_string("\n");
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_fatal(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_FATAL,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
sh_log_send_string("\n");
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_ltest(const char* str) {
if (str==SH_NULLPTR) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_TEST,
.output_source=SH_LOG_SOURCE_TEST,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_ldebug(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_DEBUG,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_llog(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_LOG,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_lwarning(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_WARNING,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_lerror(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_ERROR,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_lcritical(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_CRITICAL,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_lfatal(const char* str,sh_log_OUTPUT_SOURCE source) {
if (str==SH_NULLPTR || !(sh_log_output_source_valid(source))) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_log_OUTPUT_PAYLOAD payload={
.output_type=SH_LOG_FATAL,
.output_source=source,
.tsc_value=sh_tsc_get_kernel_current_tsc(),
.message_pointer=str
};
SH_STATUS status=sh_log_send_payload(&payload);
if (sh_status_error(status)) {
return status;
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_log_mem_stats(sh_log_OUTPUT_SOURCE source) {
sh_page_MEM_STATS mem_stats;
sh_page_get_memory_stats(&mem_stats);
if (sh_log_get_log_level()<=1) {
sh_log_llog("Total memory installed (bytes) : ",source);
sh_log_send_uintn(mem_stats.memory_total_bytes);
sh_log_send_string(" / 0x");
sh_log_send_uintn_hex(mem_stats.memory_total_bytes);
sh_log_send_string("\n");
sh_log_llog("Total memory installed (pages) : ",source);
sh_log_send_uintn(mem_stats.memory_total_pages);
sh_log_send_string(" / 0x");
sh_log_send_uintn_hex(mem_stats.memory_total_pages);
sh_log_send_string("\n");
sh_log_llog("Free memory : ",source);
sh_log_send_double(mem_stats.free_ratio*100);
sh_log_send_string("%\n");
sh_log_llog("Used memory : ",source);
sh_log_send_double(mem_stats.used_ratio*100);
sh_log_send_string("%\n");
sh_log_llog("Free pages : ",source);
sh_log_send_uintn(mem_stats.free_pages);
sh_log_send_string("\n");
sh_log_llog("Used pages : ",source);
sh_log_send_uintn(mem_stats.used_pages);
sh_log_send_string("\n");
sh_log_llog("Largest free block (pages) : ",source);
sh_log_send_uintn(mem_stats.largest_free_block);
sh_log_send_string("\n");
sh_log_llog("Largest used block (pages) : ",source);
sh_log_send_uintn(mem_stats.largest_used_block);
sh_log_send_string("\n");
sh_log_llog("Free block count : ",source);
sh_log_send_uintn(mem_stats.free_blocks_count);
sh_log_send_string("\n");
sh_log_llog("Used block count : ",source);
sh_log_send_uintn(mem_stats.used_blocks_count);
sh_log_send_string("\n");
sh_log_llog("Total memory taken by physical bitmap (bytes) : ",source);
sh_log_send_uintn(mem_stats.physical_bitmap_size_bytes);
sh_log_send_string(" / 0x");
sh_log_send_uintn_hex(mem_stats.physical_bitmap_size_bytes);
sh_log_send_string("\n");
sh_log_llog("Total memory taken by physical bitmap (pages) : ",source);
sh_log_send_uintn(mem_stats.physical_bitmap_size_pages);
sh_log_send_string(" / 0x");
sh_log_send_uintn_hex(mem_stats.physical_bitmap_size_pages);
sh_log_send_string("\n");
}
return SH_STATUS_SUCCESS;
}

721
page.c Normal file
View File

@@ -0,0 +1,721 @@
#include "../include/page.h"
#include <stdint.h>
__attribute__((section(".bss")))
static sh_uint8 memory_map_buffer[64*1024];
static sh_uint8 *physical_bitmap;
__attribute__((section(".bss")))
static sh_uint64 physical_memory_pages_count=0;
__attribute__((section(".bss")))
static sh_uint64 physical_memory_bytes_count=0;
__attribute__((section(".bss")))
static sh_uint64 physical_bitmap_size_bytes=0;
__attribute__((section(".bss")))
static sh_uint64 physical_bitmap_size_pages=0;
static sh_page_VIRTUAL_ADRESS page_table_pool_va_ptr=SH_PAGE_NULL_VA;
SH_STATUS sh_page_load_boot_ptp_va(sh_page_VIRTUAL_ADRESS pt_pool_va) {
page_table_pool_va_ptr=pt_pool_va;
sh_log_ldebug("Page table pool VA: 0x",SH_LOG_SOURCE_PAGE);
sh_log_send_uintn_hex((sh_uint64)page_table_pool_va_ptr);
sh_log_send_string("\n");
sh_uint8 first_byte=*(sh_uint8*)(page_table_pool_va_ptr);
sh_log_debug("If you can see this message, no fault happened.",SH_LOG_SOURCE_PAGE);
return SH_STATUS_SUCCESS;
}
sh_page_VIRTUAL_ADRESS sh_page_get_boot_ptp_va() {
return page_table_pool_va_ptr;
}
SH_STATUS sh_page_copy_memory_map() {
return sh_mem_copy(memory_map_buffer,(void*)SH_PAGE_MEMORY_MAP_VA,sizeof(memory_map_buffer));
}
SH_STATUS sh_page_check_memory_map() {
static const sh_uint8 memory_map_sig[8]={'S','h','e','M','m','a','p','B'};
if (sh_mem_compare(memory_map_sig,memory_map_buffer,sizeof(memory_map_sig))==SH_STATUS_MEM_NOT_EQUAL) {
sh_log_critical("Memory map doesn't have signature on.",SH_LOG_SOURCE_PAGE);
return SH_STATUS_INVALID_SIGNATURE;
}
sh_page_MEMORY_MAP_HEADER *memory_map_header=(sh_page_MEMORY_MAP_HEADER *)memory_map_buffer;
sh_log_ldebug("Memory map entry count: ",SH_LOG_SOURCE_PAGE);
sh_log_send_uintn((sh_uint64)memory_map_header->entry_count);
sh_log_send_string("\n");
sh_log_ldebug("Memory map entry size: ",SH_LOG_SOURCE_PAGE);
sh_log_send_uintn((sh_uint64)memory_map_header->entry_size);
sh_log_send_string("\n");
sh_log_ldebug("Memory map syntax version: ",SH_LOG_SOURCE_PAGE);
sh_log_send_uintn((sh_uint64)memory_map_header->mmap_syntax_version);
sh_log_send_string("\n");
if (memory_map_header->entry_count*memory_map_header->entry_size+sizeof(sh_page_MEMORY_MAP_HEADER)>sizeof(memory_map_buffer)) {
sh_log_error("Memory map overflow allocated buffer.",SH_LOG_SOURCE_PAGE);
return SH_STATUS_MMAP_BUFFER_OVERFLOW;
}
return SH_STATUS_SUCCESS;
}
void sh_page_dump_memory_map() {
sh_page_MEMORY_MAP_HEADER *memory_map_header=(sh_page_MEMORY_MAP_HEADER *)memory_map_buffer;
sh_log_send_string("Memory map dump:\n");
sh_log_send_string("Header:\n");
for (sh_uint64 i=0;i<sizeof(sh_page_MEMORY_MAP_HEADER);++i) {
sh_log_send_uintn((sh_uint64)memory_map_buffer[i]);
sh_log_send_string(" ");
}
sh_log_send_string("\n");
for (sh_uint64 i=0;i<memory_map_header->entry_count;++i) {
sh_log_send_string("Entry number ");
sh_log_send_uintn(i);
sh_log_send_string(" : ");
for (sh_uint64 y=0;y<sizeof(sh_page_MEMORY_MAP_ENTRY);++y) {
sh_log_send_uintn((sh_uint64)memory_map_buffer[sizeof(sh_page_MEMORY_MAP_HEADER)+i*sizeof(sh_page_MEMORY_MAP_ENTRY)+y]);
sh_log_send_string(" ");
}
sh_log_send_string("\n");
}
}
sh_uint64 sh_page_get_physical_memory_amount_pages() {
return physical_memory_pages_count;
}
sh_uint64 sh_page_get_physical_memory_amount_bytes() {
return physical_memory_bytes_count;
}
sh_uint64 sh_page_get_one_page_na() {
sh_uint64 page_count=physical_memory_pages_count;
sh_uint64 bitmap_word_count=(page_count+63)/64;
for (sh_uint64 word=0;word<bitmap_word_count;word++) {
sh_uint64 value=physical_bitmap[word];
if (value==0xFFFFFFFFFFFFFFFFULL) {
continue;
}
for (sh_uint64 bit=0;bit<64;bit++) {
sh_uint64 page_index=(word*64)+bit;
if (page_index>=page_count) {
return 0;
}
if ((value & (1ULL<<bit))==0) {
return page_index*SH_PAGE_SIZE;
}
}
}
return 0;
}
SH_STATUS sh_page_set_pages_range_bitmap(sh_uint8 *bitmap,sh_uint64 page_count_in_bitmap,sh_uint64 page_index,sh_uint64 page_count,sh_bool state) {
if (bitmap==SH_NULLPTR) {
return SH_STATUS_INVALID_PARAMETER;
}
if (page_index+page_count>page_count_in_bitmap) {
return SH_STATUS_INVALID_PARAMETER;
}
for (sh_uint64 i=0;i<page_count;++i) {
sh_uint64 page=page_index+i;
sh_uint64 byte_index=page/8;
sh_uint8 bit_index=page%8;
if (state) {
bitmap[byte_index]|=(sh_uint8)(1u<<bit_index);
} else {
bitmap[byte_index]&=(sh_uint8)~(1u<<bit_index);
}
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_page_init_ptp(sh_page_PHYSICAL_ADRESS ptp_pa,sh_page_VIRTUAL_ADRESS ptp_va,sh_uint64 initial_fill_level,sh_page_PAGE_TABLE_POOL *page_table_pool) {
page_table_pool->page_table_pa=ptp_pa;
page_table_pool->page_table_va=ptp_va;
page_table_pool->ptp_pages_count=SH_PAGE_PTP_ALLOCATOR_PAGES_COUNT;
page_table_pool->ptp_alloc_bitmap_uint64_count=SH_PAGE_PTP_ALLOCATOR_BITMAP_UINT64;
SH_STATUS status=sh_mem_set_8((sh_uint8*)page_table_pool->ptp_alloc_bitmap,SH_FALSE,sizeof(page_table_pool->ptp_alloc_bitmap));
if (sh_status_error(status)) {
sh_log_error("Error: couldn't initialize page table pool bitmap.",SH_LOG_SOURCE_PAGE);
return SH_STATUS_PT_POOL_NO_BITMAP_INIT;
}
if (initial_fill_level!=0) {
status=sh_page_set_pages_range_bitmap((sh_uint8*)page_table_pool->ptp_alloc_bitmap,page_table_pool->ptp_pages_count,0,initial_fill_level,SH_TRUE);
if (sh_status_error(status)) {
sh_log_error("Error: couldn't initialize pages tables already alocated.",SH_LOG_SOURCE_PAGE);
return SH_STATUS_PT_POOL_NO_PAGE_SET;
}
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_page_dump_ptp_bitmap(sh_page_PAGE_TABLE_POOL *ptp) {
for (sh_uint64 i=0;i<ptp->ptp_alloc_bitmap_uint64_count;++i) {
sh_log_send_string(" 0x");
sh_log_send_uintn_hex(ptp->ptp_alloc_bitmap[i]);
}
sh_log_send_string("\n");
return SH_STATUS_SUCCESS;
}
sh_page_PHYSICAL_ADRESS sh_page_ptp_alloc_one_page(sh_page_PAGE_TABLE_POOL *pt_pool) {
if (pt_pool==SH_NULLPTR) {
return SH_STATUS_INVALID_PARAMETER;
}
sh_uint64 page_count=pt_pool->ptp_pages_count;
sh_uint64 bitmap_word_count=(page_count+63)/64;
for (sh_uint64 word=0;word<bitmap_word_count;word++) {
sh_uint64 value=pt_pool->ptp_alloc_bitmap[word];
if (value==0xFFFFFFFFFFFFFFFFULL) {
continue;
}
for (sh_uint64 bit=0;bit<64;bit++) {
sh_uint64 page_index=(word*64)+bit;
if (page_index>=page_count) {
return 0;
}
if ((value & (1ULL<<bit))==0) {
pt_pool->ptp_alloc_bitmap[word]|=(1ULL<<bit);
sh_page_PHYSICAL_ADRESS pa=pt_pool->page_table_pa+page_index*SH_PAGE_SIZE;
return pa;
}
}
}
return 0;
}
SH_STATUS sh_page_map_one_page_ptp(sh_page_PAGE_TABLE_POOL *ptp,sh_page_VIRTUAL_ADRESS va,sh_page_PHYSICAL_ADRESS pa,sh_uint64 flags) {
if (ptp==SH_NULLPTR) return SH_STATUS_INVALID_PARAMETER;
if (va%SH_PAGE_SIZE!=0 || pa%SH_PAGE_SIZE!=0) return SH_STATUS_INVALID_PARAMETER;
sh_uint64 pml4_i=(va>>39) & 0x1FF;
sh_uint64 pdpt_i=(va>>30) & 0x1FF;
sh_uint64 pd_i=(va>>21) & 0x1FF;
sh_uint64 pt_i=(va>>12) & 0x1FF;
sh_uint64 *pdpt;
sh_uint64 *pd;
sh_uint64 *pt;
sh_uint64 *pml4=(sh_uint64*)ptp->page_table_va;
if (!(pml4[pml4_i] & SH_PAGE_PRESENT)) {
sh_page_PHYSICAL_ADRESS pdpt_pa=sh_page_ptp_alloc_one_page(ptp);
if (!pdpt_pa) return SH_STATUS_OUT_OF_MEMORY;
pdpt=sh_page_ptp_pa_to_va(ptp,pdpt_pa);
if (!pdpt) return SH_STATUS_INVALID_INTERNAL_PA;
sh_mem_set_8((sh_uint8*)pdpt,0,SH_PAGE_SIZE);
pml4[pml4_i]=pdpt_pa | SH_PAGE_TABLE_FLAGS | SH_PAGE_PRESENT;
} else {
pdpt=sh_page_ptp_pa_to_va(ptp,(pml4[pml4_i] & ~0xFFFULL));
if (!pdpt) return SH_STATUS_INVALID_INTERNAL_PA;
}
if (!(pdpt[pdpt_i] & SH_PAGE_PRESENT)) {
sh_page_PHYSICAL_ADRESS pd_pa=sh_page_ptp_alloc_one_page(ptp);
if (!pd_pa) return SH_STATUS_OUT_OF_MEMORY;
pd=sh_page_ptp_pa_to_va(ptp,pd_pa);
if (!pd) return SH_STATUS_INVALID_INTERNAL_PA;
sh_mem_set_8((sh_uint8*)pd,0,SH_PAGE_SIZE);
pdpt[pdpt_i]=pd_pa | SH_PAGE_TABLE_FLAGS | SH_PAGE_PRESENT;
} else {
pd=sh_page_ptp_pa_to_va(ptp,(pdpt[pdpt_i] & ~0xFFFULL));
if (!pd) return SH_STATUS_INVALID_INTERNAL_PA;
}
if (!(pd[pd_i] & SH_PAGE_PRESENT)) {
sh_page_PHYSICAL_ADRESS pt_pa=sh_page_ptp_alloc_one_page(ptp);
if (!pt_pa) return SH_STATUS_OUT_OF_MEMORY;
pt=sh_page_ptp_pa_to_va(ptp,pt_pa);
if (!pt) return SH_STATUS_INVALID_INTERNAL_PA;
sh_mem_set_8((sh_uint8*)pt,0,SH_PAGE_SIZE);
pd[pd_i]=pt_pa | SH_PAGE_TABLE_FLAGS | SH_PAGE_PRESENT;
} else {
pt=sh_page_ptp_pa_to_va(ptp,pd[pd_i] & ~0xFFFULL);
if (!pt) return SH_STATUS_INVALID_INTERNAL_PA;
}
pt[pt_i]=(pa & ~0xFFFULL) | flags | SH_PAGE_PRESENT;
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_page_is_va_mapped_ptp(sh_page_PAGE_TABLE_POOL *ptp,sh_page_VIRTUAL_ADRESS va) {
if (ptp==SH_NULLPTR) return SH_STATUS_INVALID_PARAMETER;
sh_uint64 pml4_i=(va>>39) & 0x1FF;
sh_uint64 pdpt_i=(va>>30) & 0x1FF;
sh_uint64 pd_i=(va>>21) & 0x1FF;
sh_uint64 pt_i=(va>>12) & 0x1FF;
sh_uint64 *pdpt;
sh_uint64 *pd;
sh_uint64 *pt;
sh_uint64 *pml4=(sh_uint64*)ptp->page_table_va;
if (!(pml4[pml4_i] & SH_PAGE_PRESENT)) {
return SH_STATUS_VA_NOT_MAPPED;
} else {
pdpt=sh_page_ptp_pa_to_va(ptp,(pml4[pml4_i] & ~0xFFFULL));
if (pdpt==0) return SH_STATUS_INVALID_INTERNAL_PA;
}
if (!(pdpt[pdpt_i] & SH_PAGE_PRESENT)) {
return SH_STATUS_VA_NOT_MAPPED;
} else {
pd=sh_page_ptp_pa_to_va(ptp,(pdpt[pdpt_i] & ~0xFFFULL));
if (pd==0) return SH_STATUS_INVALID_INTERNAL_PA;
}
if (!(pd[pd_i] & SH_PAGE_PRESENT)) {
return SH_STATUS_VA_NOT_MAPPED;
} else {
pt=sh_page_ptp_pa_to_va(ptp,(pd[pd_i] & ~0xFFFULL));
if (pt==0) return SH_STATUS_INVALID_INTERNAL_PA;
}
if (!(pt[pt_i] & SH_PAGE_PRESENT)) {
return SH_STATUS_VA_NOT_MAPPED;
} else {
return SH_STATUS_VA_MAPPED;
}
}
SH_STATUS sh_page_is_va_range_mapped_ptp(sh_page_PAGE_TABLE_POOL *ptp,sh_page_VIRTUAL_ADRESS va,sh_uint64 size_bytes) {
if (ptp==SH_NULLPTR) return SH_STATUS_INVALID_PARAMETER;
if (va%SH_PAGE_SIZE!=0 || size_bytes%SH_PAGE_SIZE!=0 || size_bytes==0) return SH_STATUS_INVALID_PARAMETER;
sh_uint64 counter=0;
for (sh_uint64 i=0;i<size_bytes/SH_PAGE_SIZE;i++) {
if (sh_page_is_va_mapped_ptp(ptp,va+i*SH_PAGE_SIZE)==SH_STATUS_VA_MAPPED) {
counter++;
}
}
if (counter==0) return SH_STATUS_VA_NOT_MAPPED;
if (counter==size_bytes/SH_PAGE_SIZE) return SH_STATUS_VA_FULLY_MAPPED;
return SH_STATUS_VA_PARTIALLY_MAPPED;
}
SH_STATUS sh_page_search_available_va_range(sh_page_PAGE_TABLE_POOL *ptp,sh_page_VIRTUAL_ADRESS range_base,sh_page_VIRTUAL_ADRESS range_size_bytes,sh_uint64 size_bytes,sh_page_VIRTUAL_ADRESS *adress_found) {
if (ptp==SH_NULLPTR || adress_found==SH_NULLPTR) {
sh_log_send_string("1\n");
return SH_STATUS_INVALID_PARAMETER;
}
if (size_bytes==0 || size_bytes%SH_PAGE_SIZE!=0) {
sh_log_send_string("2\n");
return SH_STATUS_INVALID_PARAMETER;
};
if (range_base%SH_PAGE_SIZE!=0 || range_size_bytes%4096!=0 || range_size_bytes==0) {
sh_log_send_string("3\n");
return SH_STATUS_INVALID_PARAMETER;
};
sh_uint64 pages_needed=size_bytes/SH_PAGE_SIZE;
sh_page_VIRTUAL_ADRESS current_va=range_base;
sh_uint64 contiguous=0;
sh_page_VIRTUAL_ADRESS candidate_start=0;
while (current_va<range_base+range_size_bytes) {
SH_STATUS status=sh_page_is_va_mapped_ptp(ptp,current_va);
if (status==SH_STATUS_VA_NOT_MAPPED) {
if (contiguous==0) {
candidate_start=current_va;
}
contiguous++;
if (contiguous==pages_needed) {
*adress_found=(sh_page_VIRTUAL_ADRESS)candidate_start;
return SH_STATUS_SUCCESS;
}
} else if (status==SH_STATUS_VA_MAPPED) {
contiguous=0;
} else {
return SH_STATUS_INVALID_INTERNAL_PA;
}
current_va+=SH_PAGE_SIZE;
}
return SH_STATUS_OUT_OF_MEMORY;
}
SH_STATUS sh_page_map_contiguous_pages_range_ptp(sh_page_PAGE_TABLE_POOL *ptp,sh_page_VIRTUAL_ADRESS va,sh_page_PHYSICAL_ADRESS pa,sh_uint64 flags,sh_uint64 size_bytes) {
if (ptp==SH_NULLPTR) return SH_STATUS_INVALID_PARAMETER;
if (va%SH_PAGE_SIZE!=0 || pa%SH_PAGE_SIZE!=0 || size_bytes==0 || size_bytes%SH_PAGE_SIZE!=0) return SH_STATUS_INVALID_PARAMETER;
if ((flags & SH_PAGE_PS)==SH_PAGE_PS) return SH_STATUS_INVALID_PARAMETER;
sh_uint64 pages=size_bytes/SH_PAGE_SIZE;
SH_STATUS status=sh_page_is_va_range_mapped_ptp(ptp,va,size_bytes);
if (status==SH_STATUS_VA_PARTIALLY_MAPPED) return SH_STATUS_ERROR_VA_PARTIALLY_MAPPED;
if (status==SH_STATUS_VA_FULLY_MAPPED) return SH_STATUS_ERROR_VA_FULLY_MAPPED;
for (sh_uint64 i=0;i<pages;i++) {
status=sh_page_map_one_page_ptp(ptp,va+i*SH_PAGE_SIZE,pa+i*SH_PAGE_SIZE,flags);
if (status!=SH_STATUS_SUCCESS) {
return status;
}
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_page_search_physical_contiguous_block_na(sh_uint64 pages_needed,sh_page_PHYSICAL_ADRESS *pa) {
if (pages_needed==0) return SH_STATUS_INVALID_PARAMETER;
sh_uint64 page_count=sh_page_get_physical_memory_amount_pages();
sh_uint64 contiguous=0;
sh_uint64 candidate_start=0;
for (sh_uint64 page_index=0;page_index<page_count;page_index++) {
if (!sh_page_is_allocated(physical_bitmap,page_index)) {
if (contiguous==0) candidate_start=page_index;
contiguous++;
if (contiguous==pages_needed) {
*pa=candidate_start*SH_PAGE_SIZE;
return SH_STATUS_SUCCESS;
}
} else {
contiguous=0;
}
}
return SH_STATUS_OUT_OF_MEMORY;
}
SH_STATUS sh_page_alloc_contiguous(sh_page_PAGE_TABLE_POOL *ptp,sh_uint64 size_bytes,sh_page_VIRTUAL_ADRESS *va) {
if (ptp==SH_NULLPTR || va==SH_NULLPTR || size_bytes==0) return SH_STATUS_INVALID_PARAMETER;
sh_uint64 pages_needed=size_bytes/SH_PAGE_SIZE;
if (size_bytes%SH_PAGE_SIZE!=0) pages_needed++;
sh_page_VIRTUAL_ADRESS candidate_va=0;
SH_STATUS status=sh_page_search_available_va_range(ptp,0x0,0x00007FFFFFFFF000,pages_needed*SH_PAGE_SIZE,&candidate_va);
if (status==SH_STATUS_OUT_OF_MEMORY) {
return SH_STATUS_OUT_OF_MEMORY;
} else if (status==SH_STATUS_INVALID_INTERNAL_PA) {
return SH_STATUS_KERNEL_PANIC;
} else if (status==SH_STATUS_INVALID_PARAMETER) {
return SH_STATUS_OUT_OF_MEMORY;
} else if (sh_status_error(status)) {
return SH_STATUS_OUT_OF_MEMORY;
}
sh_page_PHYSICAL_ADRESS candidate_pa=0;
status=sh_page_search_physical_contiguous_block_na(pages_needed,&candidate_pa);
if (status!=SH_STATUS_SUCCESS) {
return SH_STATUS_OUT_OF_MEMORY;
}
status=sh_page_map_contiguous_pages_range_ptp(ptp,candidate_va,candidate_pa,SH_PAGE_PRESENT | SH_PAGE_NX | SH_PAGE_RW,pages_needed*SH_PAGE_SIZE);
if (status!=SH_STATUS_SUCCESS) return status;
sh_page_set_pages_range_bitmap(physical_bitmap,physical_memory_pages_count,(sh_uint64)candidate_pa/SH_PAGE_SIZE,pages_needed,SH_TRUE);
*va=candidate_va;
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_page_alloc_contiguous_extended(sh_page_PAGE_TABLE_POOL *ptp,sh_uint64 size_bytes,sh_page_VIRTUAL_ADRESS* va,DEFAULT sh_uint64 flags,DEFAULT sh_page_VIRTUAL_ADRESS va_range_start,DEFAULT sh_uint64 va_range_size_bytes) {
if (ptp==SH_NULLPTR || va==SH_NULLPTR || size_bytes==0) return SH_STATUS_INVALID_PARAMETER;
if (flags==SH_DEFVALUE) flags=SH_PAGE_PRESENT | SH_PAGE_NX | SH_PAGE_RW;
if (va_range_start==SH_DEFVALUE) va_range_start=0x0;
if (va_range_size_bytes==SH_DEFVALUE) va_range_size_bytes=0x00007FFFFFFFF000;
if (va_range_start%SH_PAGE_SIZE!=0 || va_range_size_bytes==0 || va_range_size_bytes%SH_PAGE_SIZE!=0) return SH_STATUS_INVALID_PARAMETER;
sh_uint64 pages_needed=size_bytes/SH_PAGE_SIZE;
if (size_bytes%SH_PAGE_SIZE!=0) pages_needed++;
sh_page_VIRTUAL_ADRESS candidate_va=0;
SH_STATUS status=sh_page_search_available_va_range(ptp,va_range_start,va_range_size_bytes,pages_needed*SH_PAGE_SIZE,&candidate_va);
if (status==SH_STATUS_OUT_OF_MEMORY) {
return SH_STATUS_OUT_OF_MEMORY;
} else if (status==SH_STATUS_INVALID_INTERNAL_PA) {
return SH_STATUS_KERNEL_PANIC;
} else if (status==SH_STATUS_INVALID_PARAMETER) {
return SH_STATUS_OUT_OF_MEMORY;
} else if (sh_status_error(status)) {
return SH_STATUS_OUT_OF_MEMORY;
}
sh_page_PHYSICAL_ADRESS candidate_pa=0;
status=sh_page_search_physical_contiguous_block_na(pages_needed,&candidate_pa);
if (status!=SH_STATUS_SUCCESS) {
return SH_STATUS_OUT_OF_MEMORY;
}
status=sh_page_map_contiguous_pages_range_ptp(ptp,candidate_va,candidate_pa,flags,pages_needed*SH_PAGE_SIZE);
if (status!=SH_STATUS_SUCCESS) return status;
sh_page_set_pages_range_bitmap(physical_bitmap,physical_memory_pages_count,(sh_uint64)candidate_pa/SH_PAGE_SIZE,pages_needed,SH_TRUE);
*va=candidate_va;
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_page_unmap_one_page_ptp(sh_page_PAGE_TABLE_POOL *ptp,sh_page_VIRTUAL_ADRESS va) {
if (ptp==SH_NULLPTR) return SH_STATUS_INVALID_PARAMETER;
if (va%SH_PAGE_SIZE!=0) return SH_STATUS_INVALID_PARAMETER;
sh_uint64 pml4_i=(va>>39) & 0x1FF;
sh_uint64 pdpt_i=(va>>30) & 0x1FF;
sh_uint64 pd_i=(va>>21) & 0x1FF;
sh_uint64 pt_i=(va>>12) & 0x1FF;
sh_uint64 *pdpt;
sh_uint64 *pd;
sh_uint64 *pt;
sh_uint64 *pml4=(sh_uint64*)ptp->page_table_va;
if (!(pml4[pml4_i] & SH_PAGE_PRESENT)) {
return SH_STATUS_ERROR_VA_NOT_MAPPED;
} else {
pdpt=sh_page_ptp_pa_to_va(ptp,(pml4[pml4_i] & ~0xFFFULL));
if (!pdpt) return SH_STATUS_INVALID_INTERNAL_PA;
}
if (!(pdpt[pdpt_i] & SH_PAGE_PRESENT)) {
return SH_STATUS_ERROR_VA_NOT_MAPPED;
} else {
pd=sh_page_ptp_pa_to_va(ptp,(pdpt[pdpt_i] & ~0xFFFULL));
if (!pd) return SH_STATUS_INVALID_INTERNAL_PA;
}
if (!(pd[pd_i] & SH_PAGE_PRESENT)) {
return SH_STATUS_ERROR_VA_NOT_MAPPED;
} else {
pt=sh_page_ptp_pa_to_va(ptp,pd[pd_i] & ~0xFFFULL);
if (!pt) return SH_STATUS_INVALID_INTERNAL_PA;
}
pt[pt_i]=0x0ULL;
__asm__ volatile("invlpg (%0)" :: "r"(va) : "memory");
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_page_unmap_contiguous_pages_range_ptp(sh_page_PAGE_TABLE_POOL *ptp,sh_page_VIRTUAL_ADRESS va,sh_uint64 size_bytes) {
if (ptp==SH_NULLPTR) return SH_STATUS_INVALID_PARAMETER;
if (va%SH_PAGE_SIZE!=0 || size_bytes==0 || size_bytes%SH_PAGE_SIZE!=0) return SH_STATUS_INVALID_PARAMETER;
sh_uint64 pages=size_bytes/SH_PAGE_SIZE;
SH_STATUS status=sh_page_is_va_range_mapped_ptp(ptp,va,size_bytes);
if (status==SH_STATUS_VA_PARTIALLY_MAPPED) return SH_STATUS_ERROR_VA_PARTIALLY_MAPPED;
if (status==SH_STATUS_VA_NOT_MAPPED) return SH_STATUS_ERROR_VA_NOT_MAPPED;
for (sh_uint64 i=0;i<pages;i++) {
status=sh_page_unmap_one_page_ptp(ptp,va+i*SH_PAGE_SIZE);
if (status!=SH_STATUS_SUCCESS) {
return status;
}
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_page_ptp_va_to_pa(sh_page_PAGE_TABLE_POOL *ptp,sh_page_VIRTUAL_ADRESS va,sh_page_PHYSICAL_ADRESS *pa) {
if (ptp==SH_NULLPTR || pa==SH_NULLPTR) return SH_STATUS_INVALID_PARAMETER;
if (va%SH_PAGE_SIZE!=0) return SH_STATUS_INVALID_PARAMETER;
sh_uint64 pml4_i=(va>>39) & 0x1FF;
sh_uint64 pdpt_i=(va>>30) & 0x1FF;
sh_uint64 pd_i=(va>>21) & 0x1FF;
sh_uint64 pt_i=(va>>12) & 0x1FF;
sh_uint64 *pdpt;
sh_uint64 *pd;
sh_uint64 *pt;
sh_uint64 *pml4=(sh_uint64*)ptp->page_table_va;
if (!(pml4[pml4_i] & SH_PAGE_PRESENT)) {
return SH_STATUS_ERROR_VA_NOT_MAPPED;
} else {
pdpt=sh_page_ptp_pa_to_va(ptp,(pml4[pml4_i] & ~0xFFFULL));
if (!pdpt) return SH_STATUS_INVALID_INTERNAL_PA;
}
if (!(pdpt[pdpt_i] & SH_PAGE_PRESENT)) {
return SH_STATUS_ERROR_VA_NOT_MAPPED;
} else {
pd=sh_page_ptp_pa_to_va(ptp,(pdpt[pdpt_i] & ~0xFFFULL));
if (!pd) return SH_STATUS_INVALID_INTERNAL_PA;
}
if (!(pd[pd_i] & SH_PAGE_PRESENT)) {
return SH_STATUS_ERROR_VA_NOT_MAPPED;
} else {
pt=sh_page_ptp_pa_to_va(ptp,pd[pd_i] & ~0xFFFULL);
if (!pt) return SH_STATUS_INVALID_INTERNAL_PA;
}
if (!(pt[pt_i] & SH_PAGE_PRESENT)) return SH_STATUS_ERROR_VA_NOT_MAPPED;
*pa=pt[pt_i] & 0x000FFFFFFFFFF000;
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_page_unalloc_one_page(sh_page_PAGE_TABLE_POOL *ptp,sh_page_VIRTUAL_ADRESS va) {
if (ptp==SH_NULLPTR) return SH_STATUS_INVALID_PARAMETER;
if (va%SH_PAGE_SIZE!=0) return SH_STATUS_INVALID_PARAMETER;
sh_page_PHYSICAL_ADRESS equivalent_pa;
SH_STATUS status=sh_page_ptp_va_to_pa(ptp,va,&equivalent_pa);
if (status!=SH_STATUS_SUCCESS) return status;
status=sh_page_unmap_one_page_ptp(ptp,va); // If this call return SH_STATUS_ERROR_VA_NOT_MAPPED, there is a severe bug that should cause kernel panic because sh_page_ptp_va_to_pa should already have returned exact same error code.
if (status!=SH_STATUS_SUCCESS) return status;
status=sh_page_set_pages_range_bitmap(physical_bitmap,physical_memory_pages_count,(sh_uint64)(equivalent_pa/SH_PAGE_SIZE),1,SH_FALSE);
if (status!=SH_STATUS_SUCCESS) return status;
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_page_unalloc_contiguous(sh_page_PAGE_TABLE_POOL *ptp,sh_page_VIRTUAL_ADRESS va,sh_uint64 size_bytes) {
if (ptp==SH_NULLPTR) return SH_STATUS_INVALID_PARAMETER;
if (va%SH_PAGE_SIZE!=0 || size_bytes==0 || size_bytes%SH_PAGE_SIZE!=0) return SH_STATUS_INVALID_PARAMETER;
SH_STATUS status=sh_page_is_va_range_mapped_ptp(ptp,va,size_bytes);
if (status==SH_STATUS_VA_NOT_MAPPED) return SH_STATUS_ERROR_VA_NOT_MAPPED;
if (status==SH_STATUS_VA_PARTIALLY_MAPPED) return SH_STATUS_ERROR_VA_PARTIALLY_MAPPED;
sh_uint64 pages=size_bytes/SH_PAGE_SIZE;
for (sh_uint64 i=0;i<pages;i++) {
status=sh_page_unalloc_one_page(ptp,va+i*SH_PAGE_SIZE);
if (status!=SH_STATUS_SUCCESS) {
return status;
}
}
return SH_STATUS_SUCCESS;
}
SH_STATUS sh_page_analyse_memory_map(sh_page_PAGE_TABLE_POOL *ptp) {
if (ptp==SH_NULLPTR) return SH_STATUS_INVALID_PARAMETER;
sh_page_MEMORY_MAP_HEADER *memory_map_header=(sh_page_MEMORY_MAP_HEADER *)memory_map_buffer;
sh_page_MEMORY_MAP_ENTRY *memory_map_cursor=(sh_page_MEMORY_MAP_ENTRY *)(memory_map_buffer+sizeof(sh_page_MEMORY_MAP_HEADER));
sh_bool verbose=sh_log_get_log_level()==0;
sh_bool log=sh_log_get_log_level()<=1;
sh_uint64 highest_usable_segment=0;
sh_uint64 highest_usable_page=0;
sh_uint64 biggest_segment_index=0;
sh_uint64 biggest_segment_pages=0;
if (log) {
sh_log_llog("Max pages count is currently set to 0x",SH_LOG_SOURCE_PAGE);
sh_log_send_uintn_hex(SH_PAGE_MAX_PAGES_COUNT);
sh_log_send_string(" pages or 0x");
sh_log_send_uintn_hex(SH_PAGE_MAX_PAGES_COUNT*4096);
sh_log_send_string(" bytes.\n");
}
// first loop : identify memory amount and bigest free region
for (sh_uint64 i=0;i<memory_map_header->entry_count;i++) {
sh_uint64 start_page=memory_map_cursor[i].physical_start/4096;
sh_uint64 end_page=start_page+memory_map_cursor[i].pages_count;
if (memory_map_cursor[i].type==SH_PAGE_CONVENTIONAL_MEMORY && memory_map_cursor[i].pages_count>biggest_segment_pages) {
biggest_segment_pages=memory_map_cursor[i].pages_count;
biggest_segment_index=i;
}
if (verbose) {
sh_log_ldebug("Found memory map segment #",SH_LOG_SOURCE_PAGE);
sh_log_send_uintn(i);
sh_log_send_string(": [0x");
sh_log_send_uintn_hex(start_page*4096);
sh_log_send_string(" - 0x");
sh_log_send_uintn_hex(end_page*4096);
sh_log_send_string("] Memory type: ");
sh_log_send_uintn(memory_map_cursor[i].type);
if (memory_map_cursor[i].type==SH_PAGE_CONVENTIONAL_MEMORY) {
sh_log_send_string(" --> usable\n");
} else {
sh_log_send_string(" --> not usable\n");
}
if (!(end_page<=SH_PAGE_MAX_PAGES_COUNT)) {
sh_log_lwarning("Memory map segment #",SH_LOG_SOURCE_PAGE);
sh_log_send_uintn(i);
sh_log_send_string(" isn't usable because it overflow over max page count. Enable debug log channel to see more.\n");
}
}
if (memory_map_cursor[i].type==SH_PAGE_CONVENTIONAL_MEMORY && end_page<=SH_PAGE_MAX_PAGES_COUNT) {
if (end_page>highest_usable_page) {
highest_usable_segment=i;
highest_usable_page=end_page;
}
}
}
physical_memory_pages_count=highest_usable_page;
physical_memory_bytes_count=physical_memory_pages_count*SH_PAGE_SIZE;
if (verbose) {
sh_log_ldebug("Total memory was given by memory map segment #",SH_LOG_SOURCE_PAGE);
sh_log_send_uintn(highest_usable_segment);
sh_log_send_string("\n");
sh_log_ldebug("Total memory (pages): 0x",SH_LOG_SOURCE_PAGE);
sh_log_send_uintn_hex(physical_memory_pages_count);
sh_log_send_string(". Total memory (bytes) : 0x");
sh_log_send_uintn_hex(physical_memory_bytes_count);
sh_log_send_string("\n");
}
if (biggest_segment_pages==0) {
sh_log_error("No suitable conventional memory segment found.",SH_LOG_SOURCE_PAGE);
return SH_STATUS_OUT_OF_MEMORY;
}
if (memory_map_cursor[biggest_segment_index].pages_count<(physical_memory_pages_count/8)) {
sh_log_error("Memory is too low or too fragmented to allocate physical bitmap.",SH_LOG_SOURCE_PAGE);
return SH_STATUS_OUT_OF_MEMORY;
}
sh_page_PHYSICAL_ADRESS pa=memory_map_cursor[biggest_segment_index].physical_start;
sh_page_VIRTUAL_ADRESS va;
physical_bitmap_size_bytes=physical_memory_pages_count/8;
if (physical_memory_pages_count%8!=0) physical_bitmap_size_bytes++;
physical_bitmap_size_pages=physical_bitmap_size_bytes/SH_PAGE_SIZE;
if (physical_bitmap_size_bytes%SH_PAGE_SIZE!=0) physical_bitmap_size_pages++;
SH_STATUS status=sh_page_search_available_va_range(ptp,SH_PAGE_KERNEL_PERM_VA_BASE,(SH_PAGE_KERNEL_PERM_VA_END-SH_PAGE_KERNEL_PERM_VA_BASE+1-0x1000),physical_bitmap_size_pages*SH_PAGE_SIZE,&va);
if (status!=SH_STATUS_SUCCESS) {
sh_log_error("Memory is too low or too fragmented to allocate physical bitmap.",SH_LOG_SOURCE_PAGE);
return status;
}
status=sh_page_map_contiguous_pages_range_ptp(ptp,va,pa,SH_PAGE_PRESENT | SH_PAGE_NX | SH_PAGE_RW,physical_bitmap_size_pages*SH_PAGE_SIZE);
if (status==SH_STATUS_OUT_OF_MEMORY) {
sh_log_error("Memory is too low or too fragmented to allocate physical bitmap.",SH_LOG_SOURCE_PAGE);
return status;
} else if (status!=SH_STATUS_SUCCESS) {
sh_log_error("An unknow error happened during physical bitmap pages mapping. See error below",SH_LOG_SOURCE_PAGE);
return status;
}
physical_bitmap=(sh_uint8*)va;
status=sh_mem_set_8(physical_bitmap,0xFF,physical_bitmap_size_bytes);
if (sh_status_error(status)) {
sh_log_error("An unknow error happened during physical bitmap filling with 0xFF. See error below.",SH_LOG_SOURCE_PAGE);
return status;
}
// second loop : actually set all free regions into physical bitmap
for (sh_uint64 i=0;i<memory_map_header->entry_count;i++) {
sh_uint64 start_page=memory_map_cursor[i].physical_start/4096;
sh_uint64 end_page=start_page+memory_map_cursor[i].pages_count;
if (end_page<=SH_PAGE_MAX_PAGES_COUNT) {
if (memory_map_cursor[i].type==SH_PAGE_CONVENTIONAL_MEMORY) {
SH_STATUS status=sh_page_set_pages_range_bitmap(physical_bitmap,physical_memory_pages_count,memory_map_cursor[i].physical_start/SH_PAGE_SIZE,memory_map_cursor[i].pages_count,SH_FALSE);
if (sh_status_error(status)) {
sh_log_error("Couldn't set this memory map segment to usable.",SH_LOG_SOURCE_PAGE);
return SH_STATUS_PMAP_NO_PAGES_SET;
}
}
}
}
sh_page_set_pages_range_bitmap(physical_bitmap,physical_memory_pages_count,pa/SH_PAGE_SIZE,physical_bitmap_size_pages,SH_TRUE);
return SH_STATUS_SUCCESS;
}
sh_page_VIRTUAL_ADRESS sh_page_get_physical_bitmap_ptr() {
return (sh_page_VIRTUAL_ADRESS)physical_bitmap;
}
static sh_uint64 popcount64(sh_uint64 x) {
x=x-((x>>1) & 0x5555555555555555ULL);
x=(x & 0x3333333333333333ULL)+((x>>2) & 0x3333333333333333ULL);
x=(x+(x>>4)) & 0x0F0F0F0F0F0F0F0FULL;
x=x+(x>>8);
x=x+(x>>16);
x=x+(x>>32);
return x & 0x7F;
}
SH_STATUS sh_page_get_memory_stats(sh_page_MEM_STATS *mem_stats) {
if (mem_stats==SH_NULLPTR) return SH_STATUS_INVALID_PARAMETER;
mem_stats->memory_total_pages=physical_memory_pages_count;
mem_stats->memory_total_bytes=physical_memory_bytes_count;
sh_uint64 free_pages=0;
sh_uint64 used_pages=0;
sh_uint64 largest_free_block=0;
sh_uint64 largest_used_block=0;
sh_uint64 free_blocks_count=0;
sh_uint64 used_blocks_count=0;
sh_uint64 current_free_block=0;
sh_uint64 current_used_block=0;
sh_uint64 full_uint64_count=physical_memory_pages_count/64;
sh_uint64 remaining_bits=physical_memory_pages_count%64;
sh_uint64 *bitmap64=(sh_uint64*)physical_bitmap;
for (sh_uint64 i=0;i<full_uint64_count;i++) {
sh_uint64 val=bitmap64[i];
unsigned ones=popcount64(val);
unsigned zeros=64-ones;
used_pages+=ones;
free_pages+=zeros;
for (int b=0;b<64;b++) {
sh_bool bit_set=(val>>b) & 1;
if (bit_set) {
current_used_block++;
if (current_free_block) {
free_blocks_count++;
if (current_free_block>largest_free_block) {
largest_free_block=current_free_block;
}
current_free_block=0;
}
} else {
current_free_block++;
if (current_used_block) {
used_blocks_count++;
if (current_used_block>largest_used_block) {
largest_used_block=current_used_block;
}
current_used_block=0;
}
}
}
}
if (remaining_bits) {
sh_uint64 val=bitmap64[full_uint64_count] & ((1ULL<<remaining_bits)-1);
unsigned ones=popcount64(val);
unsigned zeros=remaining_bits-ones;
used_pages+=ones;
free_pages+=zeros;
for (sh_uint64 b=0;b<remaining_bits;b++) {
sh_bool bit_set=(val>>b) & 1;
if (bit_set) {
current_used_block++;
if (current_free_block) {
free_blocks_count++;
if (current_free_block>largest_free_block) {
largest_free_block=current_free_block;
}
current_free_block=0;
}
} else {
current_free_block++;
if (current_used_block) {
used_blocks_count++;
if (current_used_block>largest_used_block) {
largest_used_block=current_used_block;
}
current_used_block=0;
}
}
}
}
if (current_free_block) {
free_blocks_count++;
if (current_free_block>largest_free_block) {
largest_free_block=current_free_block;
}
}
if (current_used_block) {
used_blocks_count++;
if (current_used_block>largest_used_block) {
largest_used_block=current_used_block;
}
}
mem_stats->free_pages=free_pages;
mem_stats->used_pages=used_pages;
mem_stats->free_ratio=(double)free_pages/(double)physical_memory_pages_count;
mem_stats->used_ratio=(double)used_pages/(double)physical_memory_pages_count;
mem_stats->largest_free_block=largest_free_block;
mem_stats->largest_used_block=largest_used_block;
mem_stats->free_blocks_count=free_blocks_count;
mem_stats->used_blocks_count=used_blocks_count;
mem_stats->physical_bitmap_size_bytes=(physical_memory_pages_count+7)/8;
mem_stats->physical_bitmap_size_pages=(mem_stats->physical_bitmap_size_bytes+4095)/4096;
return SH_STATUS_SUCCESS;
}

2680
test.c Normal file

File diff suppressed because it is too large Load Diff

BIN
test.ccc Normal file

Binary file not shown.

69
test.py Executable file
View File

@@ -0,0 +1,69 @@
import os
import subprocess
import time
def get_source_files(root_dir):
"""Récupère les fichiers et calcule la taille totale."""
source_files = []
total_size = 0
for root, _, files in os.walk(root_dir):
for file in files:
if file.endswith(('.c', '.h')) and len(source_files)<20000:
path = os.path.join(root, file)
source_files.append(path)
total_size += os.path.getsize(path)
return source_files, total_size
def main():
target_dir = "linux"
if not os.path.exists(target_dir):
print(f"Erreur: Le dossier {target_dir} n'existe pas.")
return
print(f"--- Analyse de {target_dir} ---")
files, total_raw_size = get_source_files(target_dir)
raw_mo = total_raw_size / (1024 * 1024)
print(f"Fichiers trouvés : {len(files)}")
print(f"Taille totale brute : {raw_mo:.2f} Mo")
# 1. Compression avec TAR
print("\n--- Lancement de TAR -cJf (XZ) ---")
start_tar = time.time()
tar_cmd = ["tar", "-cJf", "linux_sources.tar.xz", "--files-from=-"]
process_tar = subprocess.Popen(tar_cmd, stdin=subprocess.PIPE)
process_tar.communicate(input="\n".join(files).encode())
end_tar = time.time()
# 2. Compression avec CCC
print("\n--- Lancement de CCC (Output temps réel) ---")
print("-" * 40)
start_ccc = time.time()
try:
# On laisse stdout et stderr par défaut pour voir l'output de CCC
subprocess.run(["./ccc"] + files, check=True)
except subprocess.CalledProcessError as e:
print(f"\nErreur fatale CCC : {e}")
except OSError as e:
print(f"\nErreur système (trop de fichiers ?) : {e}")
return
end_ccc = time.time()
print("-" * 40)
# 3. Calculs finaux
print("\n" + "="*40)
print(f" RÉSULTATS (Source: {raw_mo:.2f} Mo)")
print("="*40)
for name, filename in [("TAR.XZ", "linux_sources.tar.xz"), ("CCC", "test.ccc")]:
if os.path.exists(filename):
size_mo = os.path.getsize(filename) / (1024 * 1024)
ratio = (size_mo / raw_mo) * 100
print(f"{name:10} : {size_mo:8.2f} Mo ({ratio:5.2f}% du total)")
else:
print(f"{name:10} : Non généré")
print(f"\nTemps TAR : {end_tar - start_tar:.2f}s")
print(f"Temps CCC : {end_ccc - start_ccc:.2f}s")
if __name__ == "__main__":
main()

BIN
test_old.ccc Normal file

Binary file not shown.