improvements incoming

This commit is contained in:
2026-02-06 00:01:23 +01:00
parent f281c71f75
commit 51ae29a898
6 changed files with 236 additions and 201 deletions

View File

@@ -1 +1 @@
g++ ccc.cpp -o ccc -ltree-sitter -ltree-sitter-c -llzma g++ ccc.cpp -o ccc -ltree-sitter -ltree-sitter-c -llzma -Ofast -march=native

359
ccc.cpp
View File

@@ -14,56 +14,35 @@
#include <lzma.h> #include <lzma.h>
using namespace std; using namespace std;
namespace fs=filesystem; namespace fs=filesystem;
const vector<bool> CCC_C_KEYYORD_HEAD {0,0,0}; const vector<bool> CCC_DELIMITER_0_HEAD={0};
const vector<bool> CCC_SPACE {0,1,1,1,0,0,1}; const vector<bool> CCC_DELIMITER_1_HEAD={1,0};
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_IF {0,0,1,0,0,0}; const vector<bool> CCC_C_KEYWORD_HEAD={1,1,0,0};
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_IFDEF {0,0,1,0,0,1}; const vector<bool> CCC_MISCELANEOUS_HEAD={1,1,0,1};
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_IFNDEF {0,0,1,0,1,0}; const vector<bool> CCC_STRING_INLINE_HEAD={1,1,1,0};
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELSE {0,0,1,0,1,1}; const vector<bool> CCC_REC_TABLE_REF_HEAD={1,1,1,1};
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELIF {0,0,1,1,0,0}; const vector<bool> CCC_STRING_INLINE_END={0,0,0,0,0,0,0,0};
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELIFDEF {0,0,1,1,0,1};
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELIFNDEF {0,0,1,1,1,0};
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ENDIF {0,0,1,1,1,1};
const vector<bool> CCC_PREPROCESSOR_OTHER_DEFINE {0,1,0,0,0,0};
const vector<bool> CCC_PREPROCESSOR_OTHER_UNDEF {0,1,0,0,0,1};
const vector<bool> CCC_PREPROCESSOR_OTHER_INCLUDE {0,1,0,0,1,0};
const vector<bool> CCC_PREPROCESSOR_OTHER_ERROR {0,1,0,0,1,1};
const vector<bool> CCC_PREPROCESSOR_OTHER_WARNING {0,1,0,1,0,0};
const vector<bool> CCC_PREPROCESSOR_OTHER_PRAGMA {0,1,0,1,0,1};
const vector<bool> CCC_PREPROCESSOR_OTHER_LINE {0,1,0,1,1,0};
const vector<bool> CCC_QUOTE {0,1,0,1,1,1};
const vector<bool> CCC_DELIMITER_HEAD {0,1,1};
const vector<bool> CCC_OTHER_GRAMMAR_HEAD {1,0,0};
const vector<bool> CCC_MISCELLANEOUS_HEAD {1,0,1};
const vector<bool> CCC_REC_TABLE_REF_HEAD {1,1,0};
const vector<bool> CCC_STRING_ASCII {1,1,1,0};
const vector<bool> CCC_STRING_UTF8 {1,1,1,1};
const vector<bool> CCC_STRING_END_ASCII {0,0,0,0,0,0,0};
const vector<bool> CCC_STRING_END_UTF8 {0,0,0,0,0,0,0,0};
#define CCC_ADD_COMPOMENT(vec,tail) \ #define CCC_ADD_COMPOMENT(vec,tail) \
do { \ do { \
auto tmp=tail; \ auto tmp=tail; \
vec.insert(vec.end(),tmp.begin(),tmp.end()); \ vec.insert(vec.end(),tmp.begin(),tmp.end()); \
} while (0) } while (0)
const vector<string> delimiter={ const vector<string> delimiter0={
"\n",
"\t",
"{", "{",
"}", "}",
"(", "(",
")", ")",
"[", "[",
"]", "]",
" ",
"{}",
"()",
"[]",
"",
";",
",", ",",
"." "."
}; };
const vector<string> other_grammer={ const vector<string> delimiter1={
"{}",
"()",
"[]",
";"
};
const vector<string> miscellaneous={
"!", "!",
"%", "%",
"'", "'",
@@ -79,9 +58,7 @@ const vector<string> other_grammer={
"^", "^",
"|", "|",
"&", "&",
"~" "~",
};
const vector<string> miscellaneous={
"+=", "+=",
"-=", "-=",
"*=", "*=",
@@ -116,6 +93,21 @@ const vector<string> miscellaneous={
"int64_t" "int64_t"
}; };
const vector<string> c_keywords={ const vector<string> c_keywords={
"#if",
"#ifdef",
"#ifndef",
"#else",
"#elif",
"#elifdef",
"#elifndef",
"#endif",
"#define",
"#undef",
"#include",
"#error",
"#warning",
"#pragma",
"#line",
"alignas", "alignas",
"alignof", "alignof",
"auto", "auto",
@@ -162,7 +154,8 @@ const vector<string> c_keywords={
"volatile", "volatile",
"while", "while",
"__asm__", "__asm__",
"__attribute__" "__attribute__",
"defined",
}; };
struct symbol { struct symbol {
string name; string name;
@@ -210,17 +203,9 @@ vector<bool> byte_to_bits(unsigned char c) {
} }
return out; return out;
} }
vector<bool> ascii_to_bits(unsigned char c) {
vector<bool> out;
for (int i=6;i>=0;i--) {
bool enabled=(c>>i)&0x01;
out.push_back(enabled);
}
return out;
}
vector<bool> generate_c_keyword(size_t index) { vector<bool> generate_c_keyword(size_t index) {
vector<bool> out; vector<bool> out;
CCC_ADD_COMPOMENT(out,CCC_C_KEYYORD_HEAD); CCC_ADD_COMPOMENT(out,CCC_C_KEYWORD_HEAD);
for (int i=5;i>=0;i--) { for (int i=5;i>=0;i--) {
bool enabled=(index>>i)&0x01; bool enabled=(index>>i)&0x01;
out.push_back(enabled); out.push_back(enabled);
@@ -241,19 +226,19 @@ vector<bool> generate_rec(size_t index,size_t total_recs) {
} }
return out; return out;
} }
vector<bool> generate_delimiter(size_t index) { vector<bool> generate_delimiter0(size_t index) {
vector<bool> out; vector<bool> out;
CCC_ADD_COMPOMENT(out,CCC_DELIMITER_HEAD); CCC_ADD_COMPOMENT(out,CCC_DELIMITER_0_HEAD);
for (int i=3;i>=0;i--) { for (int i=2;i>=0;i--) {
bool enabled=(index>>i)&0x01; bool enabled=(index>>i)&0x01;
out.push_back(enabled); out.push_back(enabled);
} }
return out; return out;
} }
vector<bool> generate_other_grammar(size_t index) { vector<bool> generate_delimiter1(size_t index) {
vector<bool> out; vector<bool> out;
CCC_ADD_COMPOMENT(out,CCC_OTHER_GRAMMAR_HEAD); CCC_ADD_COMPOMENT(out,CCC_DELIMITER_1_HEAD);
for (int i=3;i>=0;i--) { for (int i=1;i>=0;i--) {
bool enabled=(index>>i)&0x01; bool enabled=(index>>i)&0x01;
out.push_back(enabled); out.push_back(enabled);
} }
@@ -261,8 +246,8 @@ vector<bool> generate_other_grammar(size_t index) {
} }
vector<bool> generate_miscellaneous(size_t index) { vector<bool> generate_miscellaneous(size_t index) {
vector<bool> out; vector<bool> out;
CCC_ADD_COMPOMENT(out,CCC_MISCELLANEOUS_HEAD); CCC_ADD_COMPOMENT(out,CCC_MISCELANEOUS_HEAD);
for (int i=4;i>=0;i--) { for (int i=5;i>=0;i--) {
bool enabled=(index>>i)&0x01; bool enabled=(index>>i)&0x01;
out.push_back(enabled); out.push_back(enabled);
} }
@@ -270,26 +255,11 @@ vector<bool> generate_miscellaneous(size_t index) {
} }
vector<bool> generate_string_content(string str) { vector<bool> generate_string_content(string str) {
vector<bool> out; vector<bool> out;
bool is_utf8=false; CCC_ADD_COMPOMENT(out,CCC_STRING_INLINE_HEAD);
for (auto c:str) { for (auto c:str) {
if (c>127) { CCC_ADD_COMPOMENT(out,byte_to_bits(c));
is_utf8=true;
break;
}
}
if (is_utf8) {
CCC_ADD_COMPOMENT(out,CCC_STRING_UTF8);
for (auto c:str) {
CCC_ADD_COMPOMENT(out,byte_to_bits(c));
}
CCC_ADD_COMPOMENT(out,CCC_STRING_END_UTF8);
} else {
CCC_ADD_COMPOMENT(out,CCC_STRING_ASCII);
for (auto c:str) {
CCC_ADD_COMPOMENT(out,ascii_to_bits(c));
}
CCC_ADD_COMPOMENT(out,CCC_STRING_END_ASCII);
} }
CCC_ADD_COMPOMENT(out,CCC_STRING_INLINE_END);
return out; return out;
} }
void print_debug(string text) { void print_debug(string text) {
@@ -301,148 +271,143 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
vector<bool> out; vector<bool> out;
for (int i=0;i<nodes->size();i++) { for (int i=0;i<nodes->size();i++) {
string type=string(ts_node_type(nodes->at(i))); string type=string(ts_node_type(nodes->at(i)));
string supertext=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i))); string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
if (type=="#if") { if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="field_identifier" || type=="preproc_arg" || type=="escape_sequence" || type=="character" || type=="statement_identifier") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IF);
print_debug("if");
} else if (type=="#ifdef") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IFDEF);
print_debug("ifdef");
} else if (type=="#ifndef") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IFNDEF);
print_debug("ifndef");
} else if (type=="#else") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELSE);
print_debug("else");
} else if (type=="#elif") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELIF);
print_debug("elif");
} else if (type=="#elifdef") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELIFDEF);
print_debug("elifdef");
} else if (type=="#elifndef") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELIFNDEF);
print_debug("elifndef");
} else if (type=="#endif") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ENDIF);
print_debug("endif");
} else if (type=="#define") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_DEFINE);
print_debug("define");
} else if (type=="#undef") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_UNDEF);
print_debug("undef");
} else if (type=="#include") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_INCLUDE);
print_debug("include");
} else if (type=="#error") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_ERROR);
print_debug("error");
} else if (type=="#warning") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_WARNING);
print_debug("warning");
} else if (type=="#pragma" || (type=="preproc_directive" && supertext=="#pragma")) {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_PRAGMA);
print_debug("pragma");
} else if (type=="#line") {
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_LINE);
print_debug("line");
} else if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="type_identifier" || type=="field_identifier" || type=="preproc_arg" || type=="escape_sequence" || type=="character" || type=="statement_identifier") {
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
auto it=find(rec_list.begin(),rec_list.end(),text); auto it=find(rec_list.begin(),rec_list.end(),text);
if (it==rec_list.end()) { if (it==rec_list.end()) {
if (!text.empty()) { string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i))); CCC_ADD_COMPOMENT(out,generate_string_content(text));
CCC_ADD_COMPOMENT(out,generate_string_content(text)); print_debug("string ("+type+"): "+text);
print_debug("string ("+type+"): "+text);
} else {
auto it=find(delimiter.begin(),delimiter.end(),"");
size_t index=distance(delimiter.begin(),it);
CCC_ADD_COMPOMENT(out,generate_delimiter(index));
print_debug("delimiter for empty string");
}
} else { } else {
size_t index=distance(rec_list.begin(),it); size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size())); CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for string ("+type+"): "+text); print_debug("rec_table for string ("+type+"): "+text);
} }
} else if (type=="primitive_type") { } else if (type=="primitive_type" || type=="type_identifier") {
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i))); string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
auto it=find(c_keywords.begin(),c_keywords.end(),text); auto it=find(c_keywords.begin(),c_keywords.end(),text);
if (it!=c_keywords.end()) { if (it!=c_keywords.end()) {
size_t index=distance(c_keywords.begin(),it); size_t index=distance(c_keywords.begin(),it);
CCC_ADD_COMPOMENT(out,generate_c_keyword(index)); CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
print_debug("primitive_type: "+text); print_debug("type found in c keyword: "+text);
} else { } else {
auto it=find(rec_list.begin(),rec_list.end(),text); auto it=find(rec_list.begin(),rec_list.end(),text);
if (it==rec_list.end()) { if (it==rec_list.end()) {
if (!text.empty()) { if (!text.empty()) {
CCC_ADD_COMPOMENT(out,generate_string_content(text)); CCC_ADD_COMPOMENT(out,generate_string_content(text));
print_debug("string ("+type+"): "+text); print_debug("string for type ("+type+"): "+text);
} else { } else {
cout<<"Error: provided primitive is empty: "<<text<<endl;; cout<<"Warning: provided primitive is empty: "<<text<<endl;
exit(-1);
} }
} else { } else {
size_t index=distance(rec_list.begin(),it); size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size())); CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for string ("+type+"): "+text); print_debug("rec_table for string for type ("+type+"): "+text);
} }
} }
} else if (find(delimiter.begin(),delimiter.end(),type)!=delimiter.end()) { } else if (find(delimiter0.begin(),delimiter0.end(),type)!=delimiter0.end() || find(delimiter1.begin(),delimiter1.end(),type)!=delimiter1.end() || type=="\"") {
string text; string insert;
if (type=="(" && i+1<nodes->size()) { if (type=="(" && i+1<nodes->size()) {
if (string(ts_node_type(nodes->at(i+1)))==")") { if (string(ts_node_type(nodes->at(i+1)))==")") {
text="()"; insert="()";
i++; i++;
} else { } else {
text="("; insert="(";
} }
} else if (type=="[" && i+1<nodes->size()) { } else if (type=="[" && i+1<nodes->size()) {
if (string(ts_node_type(nodes->at(i+1)))=="]") { if (string(ts_node_type(nodes->at(i+1)))=="]") {
text="[]"; insert="[]";
i++; i++;
} else { } else {
text="["; insert="[";
} }
} else if (type=="{" && i+1<nodes->size()) { } else if (type=="{" && i+1<nodes->size()) {
if (string(ts_node_type(nodes->at(i+1)))=="}") { if (string(ts_node_type(nodes->at(i+1)))=="}") {
text="{}"; insert="{}";
i++; i++;
} else { } else {
text="{"; insert="{";
} }
} else { } else {
text=type; insert=type;
} }
auto it=find(delimiter.begin(),delimiter.end(),text); auto it=find(delimiter0.begin(),delimiter0.end(),insert);
if (it!=delimiter.end()) { if (it!=delimiter0.end()) {
size_t index=distance(delimiter.begin(),it); size_t index=distance(delimiter0.begin(),it);
CCC_ADD_COMPOMENT(out,generate_delimiter(index)); CCC_ADD_COMPOMENT(out,generate_delimiter0(index));
print_debug("delimiter: "+text); print_debug("delimiter 0: "+insert);
} else { } else {
cout<<"Error: unknow delimiter, that shouldn't happen: "<<text<<endl;; if (insert!="{}" && insert!="\"") {
exit(-1); auto it=find(delimiter1.begin(),delimiter1.end(),insert);
if (it!=delimiter1.end()) {
size_t index=distance(delimiter1.begin(),it);
CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
print_debug("delimiter 1: "+insert);
} else {
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
// exit(-1);
}
} else {
if (insert=="{}") {
auto it=find(delimiter1.begin(),delimiter1.end(),"{}");
if (it!=delimiter1.end()) {
size_t index=distance(delimiter1.begin(),it);
CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
CCC_ADD_COMPOMENT(out,{0});
print_debug("delimiter 1: "+insert);
} else {
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
// exit(-1);
}
} else if (insert=="\"") {
auto it=find(delimiter1.begin(),delimiter1.end(),"{}");
if (it!=delimiter1.end()) {
size_t index=distance(delimiter1.begin(),it);
CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
CCC_ADD_COMPOMENT(out,{1});
print_debug("delimiter 1: "+insert);
} else {
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
exit(-1);
}
} else {
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
// exit(-1);
}
}
} }
} else if (find(other_grammer.begin(),other_grammer.end(),type)!=other_grammer.end()) { } else if (find(c_keywords.begin(),c_keywords.end(),type)!=c_keywords.end() || type=="preproc_directive") {
auto it=find(other_grammer.begin(),other_grammer.end(),type); if (type!="preproc_directive") {
if (it!=other_grammer.end()) { auto it=find(c_keywords.begin(),c_keywords.end(),type);
size_t index=distance(other_grammer.begin(),it); if (it!=c_keywords.end()) {
CCC_ADD_COMPOMENT(out,generate_other_grammar(index)); size_t index=distance(c_keywords.begin(),it);
print_debug("other grammar: "+type); CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
print_debug("c keyword: "+type);
} else {
cout<<"Error: unknow C keyword, that shouldn't happen: "<<type<<" "<<text<<endl;;
// exit(-1);
}
} else { } else {
cout<<"Error: unknow other grammar symbol, that shouldn't happen: "<<type<<endl;; auto it=find(c_keywords.begin(),c_keywords.end(),text);
exit(-1); if (it!=c_keywords.end()) {
} size_t index=distance(c_keywords.begin(),it);
} else if (find(c_keywords.begin(),c_keywords.end(),type)!=c_keywords.end()) { CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
auto it=find(c_keywords.begin(),c_keywords.end(),type); print_debug("c keyword: "+type);
if (it!=c_keywords.end()) { } else {
size_t index=distance(c_keywords.begin(),it); auto it=find(rec_list.begin(),rec_list.end(),text);
CCC_ADD_COMPOMENT(out,generate_c_keyword(index)); if (it==rec_list.end()) {
print_debug("c keyword: "+type); if (!text.empty()) {
} else { CCC_ADD_COMPOMENT(out,generate_string_content(text));
cout<<"Error: unknow C keyword, that shouldn't happen: "<<type<<endl;; print_debug("string for c keyword ("+type+"): "+text);
exit(-1); } else {
cout<<"Warning: C keyword is empty: "<<text<<endl;
}
} else {
size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for string for c keyword ("+type+"): "+text);
}
}
} }
} else if (find(miscellaneous.begin(),miscellaneous.end(),type)!=miscellaneous.end()) { } else if (find(miscellaneous.begin(),miscellaneous.end(),type)!=miscellaneous.end()) {
auto it=find(miscellaneous.begin(),miscellaneous.end(),type); auto it=find(miscellaneous.begin(),miscellaneous.end(),type);
@@ -452,37 +417,42 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
print_debug("miscellaneous: "+type); print_debug("miscellaneous: "+type);
} else { } else {
cout<<"Error: unknow miscellaneous, that shouldn't happen: "<<type<<endl;; cout<<"Error: unknow miscellaneous, that shouldn't happen: "<<type<<endl;;
exit(-1); // exit(-1);
} }
} else if (type=="comment") { } else if (type=="comment") {
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
auto it=find(rec_list.begin(),rec_list.end(),text); auto it=find(rec_list.begin(),rec_list.end(),text);
if (it==rec_list.end()) { if (it==rec_list.end()) {
cout<<"Error: comment in reccurences map not found: "<<text<<endl;; if (it==rec_list.end()) {
exit(-1); if (!text.empty()) {
CCC_ADD_COMPOMENT(out,generate_string_content(text));
print_debug("string for comment("+type+"): "+text);
} else {
cout<<"Warning: unknow node is empty: "<<text<<endl;
}
} else {
size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for string for comment ("+type+"): "+text);
}
} else { } else {
size_t index=distance(rec_list.begin(),it); size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size())); CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for comment"); print_debug("rec_table for comment");
} }
} else if (type=="\"") {
if (i+1<nodes->size()) {
if (string(ts_node_type(nodes->at(i+1)))=="\"") {
auto it=find(delimiter.begin(),delimiter.end(),"");
size_t index=distance(delimiter.begin(),it);
CCC_ADD_COMPOMENT(out,generate_delimiter(index));
print_debug("double quotes mark, inserting delimiter for empty string");
i++;
} else {
CCC_ADD_COMPOMENT(out,CCC_QUOTE);
print_debug("single quote mark");
}
}
} else { } else {
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i))); auto it=find(rec_list.begin(),rec_list.end(),text);
cout<<"Error: unknow node type: "<<type<<endl; if (it==rec_list.end()) {
cout<<"Error: unknow node text: "<<text<<endl; if (!text.empty()) {
exit(-1); CCC_ADD_COMPOMENT(out,generate_string_content(text));
print_debug("string for unknow node ("+type+"): "+text);
} else {
cout<<"Warning: unknow node is empty: "<<text<<endl;
}
} else {
size_t index=distance(rec_list.begin(),it);
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
print_debug("rec_table for string for unknow node ("+type+"): "+text);
}
} }
} }
vector<unsigned char> payload_bytes; vector<unsigned char> payload_bytes;
@@ -517,6 +487,7 @@ void construct_rec_table(vector<string> &files_content,vector<string> files_name
} }
} }
int main(int argc,char **argv) { int main(int argc,char **argv) {
cout<<c_keywords.size()<<endl;
if (argc<2) { if (argc<2) {
cout<<"Usage: ccc [FILES]"<<endl; cout<<"Usage: ccc [FILES]"<<endl;
return -1; return -1;

View File

@@ -2,13 +2,8 @@
#include <stdint.h> #include <stdint.h>
typedef static unsigned char HEY; typedef static unsigned char HEY;
// hello // hello
// hello
// hello
// hello
// hello
// hello
int main() { int main() {
hello[]="hello"; char hello[]="hello";
HEY res=8; HEY res=8;
if (res!=9) { if (res!=9) {
printf(hello); printf(hello);

BIN
linux_sources.tar.xz Normal file

Binary file not shown.

BIN
test.ccc

Binary file not shown.

69
test.py Normal file
View File

@@ -0,0 +1,69 @@
import os
import subprocess
import time
def get_source_files(root_dir):
"""Récupère les fichiers et calcule la taille totale."""
source_files = []
total_size = 0
for root, _, files in os.walk(root_dir):
for file in files:
if file.endswith(('.c', '.h')) and len(source_files)<10000:
path = os.path.join(root, file)
source_files.append(path)
total_size += os.path.getsize(path)
return source_files, total_size
def main():
target_dir = "linux"
if not os.path.exists(target_dir):
print(f"Erreur: Le dossier {target_dir} n'existe pas.")
return
print(f"--- Analyse de {target_dir} ---")
files, total_raw_size = get_source_files(target_dir)
raw_mo = total_raw_size / (1024 * 1024)
print(f"Fichiers trouvés : {len(files)}")
print(f"Taille totale brute : {raw_mo:.2f} Mo")
# 1. Compression avec TAR
print("\n--- Lancement de TAR -cJf (XZ) ---")
start_tar = time.time()
tar_cmd = ["tar", "-cJf", "linux_sources.tar.xz", "--files-from=-"]
process_tar = subprocess.Popen(tar_cmd, stdin=subprocess.PIPE)
process_tar.communicate(input="\n".join(files).encode())
end_tar = time.time()
# 2. Compression avec CCC
print("\n--- Lancement de CCC (Output temps réel) ---")
print("-" * 40)
start_ccc = time.time()
try:
# On laisse stdout et stderr par défaut pour voir l'output de CCC
subprocess.run(["./ccc"] + files, check=True)
except subprocess.CalledProcessError as e:
print(f"\nErreur fatale CCC : {e}")
except OSError as e:
print(f"\nErreur système (trop de fichiers ?) : {e}")
return
end_ccc = time.time()
print("-" * 40)
# 3. Calculs finaux
print("\n" + "="*40)
print(f" RÉSULTATS (Source: {raw_mo:.2f} Mo)")
print("="*40)
for name, filename in [("TAR.XZ", "linux_sources.tar.xz"), ("CCC", "test.ccc")]:
if os.path.exists(filename):
size_mo = os.path.getsize(filename) / (1024 * 1024)
ratio = (size_mo / raw_mo) * 100
print(f"{name:10} : {size_mo:8.2f} Mo ({ratio:5.2f}% du total)")
else:
print(f"{name:10} : Non généré")
print(f"\nTemps TAR : {end_tar - start_tar:.2f}s")
print(f"Temps CCC : {end_ccc - start_ccc:.2f}s")
if __name__ == "__main__":
main()