improvements incoming
This commit is contained in:
2
build.sh
2
build.sh
@@ -1 +1 @@
|
|||||||
g++ ccc.cpp -o ccc -ltree-sitter -ltree-sitter-c -llzma
|
g++ ccc.cpp -o ccc -ltree-sitter -ltree-sitter-c -llzma -Ofast -march=native
|
||||||
|
|||||||
359
ccc.cpp
359
ccc.cpp
@@ -14,56 +14,35 @@
|
|||||||
#include <lzma.h>
|
#include <lzma.h>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
namespace fs=filesystem;
|
namespace fs=filesystem;
|
||||||
const vector<bool> CCC_C_KEYYORD_HEAD {0,0,0};
|
const vector<bool> CCC_DELIMITER_0_HEAD={0};
|
||||||
const vector<bool> CCC_SPACE {0,1,1,1,0,0,1};
|
const vector<bool> CCC_DELIMITER_1_HEAD={1,0};
|
||||||
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_IF {0,0,1,0,0,0};
|
const vector<bool> CCC_C_KEYWORD_HEAD={1,1,0,0};
|
||||||
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_IFDEF {0,0,1,0,0,1};
|
const vector<bool> CCC_MISCELANEOUS_HEAD={1,1,0,1};
|
||||||
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_IFNDEF {0,0,1,0,1,0};
|
const vector<bool> CCC_STRING_INLINE_HEAD={1,1,1,0};
|
||||||
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELSE {0,0,1,0,1,1};
|
const vector<bool> CCC_REC_TABLE_REF_HEAD={1,1,1,1};
|
||||||
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELIF {0,0,1,1,0,0};
|
const vector<bool> CCC_STRING_INLINE_END={0,0,0,0,0,0,0,0};
|
||||||
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELIFDEF {0,0,1,1,0,1};
|
|
||||||
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELIFNDEF {0,0,1,1,1,0};
|
|
||||||
const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ENDIF {0,0,1,1,1,1};
|
|
||||||
const vector<bool> CCC_PREPROCESSOR_OTHER_DEFINE {0,1,0,0,0,0};
|
|
||||||
const vector<bool> CCC_PREPROCESSOR_OTHER_UNDEF {0,1,0,0,0,1};
|
|
||||||
const vector<bool> CCC_PREPROCESSOR_OTHER_INCLUDE {0,1,0,0,1,0};
|
|
||||||
const vector<bool> CCC_PREPROCESSOR_OTHER_ERROR {0,1,0,0,1,1};
|
|
||||||
const vector<bool> CCC_PREPROCESSOR_OTHER_WARNING {0,1,0,1,0,0};
|
|
||||||
const vector<bool> CCC_PREPROCESSOR_OTHER_PRAGMA {0,1,0,1,0,1};
|
|
||||||
const vector<bool> CCC_PREPROCESSOR_OTHER_LINE {0,1,0,1,1,0};
|
|
||||||
const vector<bool> CCC_QUOTE {0,1,0,1,1,1};
|
|
||||||
const vector<bool> CCC_DELIMITER_HEAD {0,1,1};
|
|
||||||
const vector<bool> CCC_OTHER_GRAMMAR_HEAD {1,0,0};
|
|
||||||
const vector<bool> CCC_MISCELLANEOUS_HEAD {1,0,1};
|
|
||||||
const vector<bool> CCC_REC_TABLE_REF_HEAD {1,1,0};
|
|
||||||
const vector<bool> CCC_STRING_ASCII {1,1,1,0};
|
|
||||||
const vector<bool> CCC_STRING_UTF8 {1,1,1,1};
|
|
||||||
const vector<bool> CCC_STRING_END_ASCII {0,0,0,0,0,0,0};
|
|
||||||
const vector<bool> CCC_STRING_END_UTF8 {0,0,0,0,0,0,0,0};
|
|
||||||
#define CCC_ADD_COMPOMENT(vec,tail) \
|
#define CCC_ADD_COMPOMENT(vec,tail) \
|
||||||
do { \
|
do { \
|
||||||
auto tmp=tail; \
|
auto tmp=tail; \
|
||||||
vec.insert(vec.end(),tmp.begin(),tmp.end()); \
|
vec.insert(vec.end(),tmp.begin(),tmp.end()); \
|
||||||
} while (0)
|
} while (0)
|
||||||
const vector<string> delimiter={
|
const vector<string> delimiter0={
|
||||||
"\n",
|
|
||||||
"\t",
|
|
||||||
"{",
|
"{",
|
||||||
"}",
|
"}",
|
||||||
"(",
|
"(",
|
||||||
")",
|
")",
|
||||||
"[",
|
"[",
|
||||||
"]",
|
"]",
|
||||||
" ",
|
|
||||||
"{}",
|
|
||||||
"()",
|
|
||||||
"[]",
|
|
||||||
"",
|
|
||||||
";",
|
|
||||||
",",
|
",",
|
||||||
"."
|
"."
|
||||||
};
|
};
|
||||||
const vector<string> other_grammer={
|
const vector<string> delimiter1={
|
||||||
|
"{}",
|
||||||
|
"()",
|
||||||
|
"[]",
|
||||||
|
";"
|
||||||
|
};
|
||||||
|
const vector<string> miscellaneous={
|
||||||
"!",
|
"!",
|
||||||
"%",
|
"%",
|
||||||
"'",
|
"'",
|
||||||
@@ -79,9 +58,7 @@ const vector<string> other_grammer={
|
|||||||
"^",
|
"^",
|
||||||
"|",
|
"|",
|
||||||
"&",
|
"&",
|
||||||
"~"
|
"~",
|
||||||
};
|
|
||||||
const vector<string> miscellaneous={
|
|
||||||
"+=",
|
"+=",
|
||||||
"-=",
|
"-=",
|
||||||
"*=",
|
"*=",
|
||||||
@@ -116,6 +93,21 @@ const vector<string> miscellaneous={
|
|||||||
"int64_t"
|
"int64_t"
|
||||||
};
|
};
|
||||||
const vector<string> c_keywords={
|
const vector<string> c_keywords={
|
||||||
|
"#if",
|
||||||
|
"#ifdef",
|
||||||
|
"#ifndef",
|
||||||
|
"#else",
|
||||||
|
"#elif",
|
||||||
|
"#elifdef",
|
||||||
|
"#elifndef",
|
||||||
|
"#endif",
|
||||||
|
"#define",
|
||||||
|
"#undef",
|
||||||
|
"#include",
|
||||||
|
"#error",
|
||||||
|
"#warning",
|
||||||
|
"#pragma",
|
||||||
|
"#line",
|
||||||
"alignas",
|
"alignas",
|
||||||
"alignof",
|
"alignof",
|
||||||
"auto",
|
"auto",
|
||||||
@@ -162,7 +154,8 @@ const vector<string> c_keywords={
|
|||||||
"volatile",
|
"volatile",
|
||||||
"while",
|
"while",
|
||||||
"__asm__",
|
"__asm__",
|
||||||
"__attribute__"
|
"__attribute__",
|
||||||
|
"defined",
|
||||||
};
|
};
|
||||||
struct symbol {
|
struct symbol {
|
||||||
string name;
|
string name;
|
||||||
@@ -210,17 +203,9 @@ vector<bool> byte_to_bits(unsigned char c) {
|
|||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
vector<bool> ascii_to_bits(unsigned char c) {
|
|
||||||
vector<bool> out;
|
|
||||||
for (int i=6;i>=0;i--) {
|
|
||||||
bool enabled=(c>>i)&0x01;
|
|
||||||
out.push_back(enabled);
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
vector<bool> generate_c_keyword(size_t index) {
|
vector<bool> generate_c_keyword(size_t index) {
|
||||||
vector<bool> out;
|
vector<bool> out;
|
||||||
CCC_ADD_COMPOMENT(out,CCC_C_KEYYORD_HEAD);
|
CCC_ADD_COMPOMENT(out,CCC_C_KEYWORD_HEAD);
|
||||||
for (int i=5;i>=0;i--) {
|
for (int i=5;i>=0;i--) {
|
||||||
bool enabled=(index>>i)&0x01;
|
bool enabled=(index>>i)&0x01;
|
||||||
out.push_back(enabled);
|
out.push_back(enabled);
|
||||||
@@ -241,19 +226,19 @@ vector<bool> generate_rec(size_t index,size_t total_recs) {
|
|||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
vector<bool> generate_delimiter(size_t index) {
|
vector<bool> generate_delimiter0(size_t index) {
|
||||||
vector<bool> out;
|
vector<bool> out;
|
||||||
CCC_ADD_COMPOMENT(out,CCC_DELIMITER_HEAD);
|
CCC_ADD_COMPOMENT(out,CCC_DELIMITER_0_HEAD);
|
||||||
for (int i=3;i>=0;i--) {
|
for (int i=2;i>=0;i--) {
|
||||||
bool enabled=(index>>i)&0x01;
|
bool enabled=(index>>i)&0x01;
|
||||||
out.push_back(enabled);
|
out.push_back(enabled);
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
vector<bool> generate_other_grammar(size_t index) {
|
vector<bool> generate_delimiter1(size_t index) {
|
||||||
vector<bool> out;
|
vector<bool> out;
|
||||||
CCC_ADD_COMPOMENT(out,CCC_OTHER_GRAMMAR_HEAD);
|
CCC_ADD_COMPOMENT(out,CCC_DELIMITER_1_HEAD);
|
||||||
for (int i=3;i>=0;i--) {
|
for (int i=1;i>=0;i--) {
|
||||||
bool enabled=(index>>i)&0x01;
|
bool enabled=(index>>i)&0x01;
|
||||||
out.push_back(enabled);
|
out.push_back(enabled);
|
||||||
}
|
}
|
||||||
@@ -261,8 +246,8 @@ vector<bool> generate_other_grammar(size_t index) {
|
|||||||
}
|
}
|
||||||
vector<bool> generate_miscellaneous(size_t index) {
|
vector<bool> generate_miscellaneous(size_t index) {
|
||||||
vector<bool> out;
|
vector<bool> out;
|
||||||
CCC_ADD_COMPOMENT(out,CCC_MISCELLANEOUS_HEAD);
|
CCC_ADD_COMPOMENT(out,CCC_MISCELANEOUS_HEAD);
|
||||||
for (int i=4;i>=0;i--) {
|
for (int i=5;i>=0;i--) {
|
||||||
bool enabled=(index>>i)&0x01;
|
bool enabled=(index>>i)&0x01;
|
||||||
out.push_back(enabled);
|
out.push_back(enabled);
|
||||||
}
|
}
|
||||||
@@ -270,26 +255,11 @@ vector<bool> generate_miscellaneous(size_t index) {
|
|||||||
}
|
}
|
||||||
vector<bool> generate_string_content(string str) {
|
vector<bool> generate_string_content(string str) {
|
||||||
vector<bool> out;
|
vector<bool> out;
|
||||||
bool is_utf8=false;
|
CCC_ADD_COMPOMENT(out,CCC_STRING_INLINE_HEAD);
|
||||||
for (auto c:str) {
|
for (auto c:str) {
|
||||||
if (c>127) {
|
CCC_ADD_COMPOMENT(out,byte_to_bits(c));
|
||||||
is_utf8=true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (is_utf8) {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_STRING_UTF8);
|
|
||||||
for (auto c:str) {
|
|
||||||
CCC_ADD_COMPOMENT(out,byte_to_bits(c));
|
|
||||||
}
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_STRING_END_UTF8);
|
|
||||||
} else {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_STRING_ASCII);
|
|
||||||
for (auto c:str) {
|
|
||||||
CCC_ADD_COMPOMENT(out,ascii_to_bits(c));
|
|
||||||
}
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_STRING_END_ASCII);
|
|
||||||
}
|
}
|
||||||
|
CCC_ADD_COMPOMENT(out,CCC_STRING_INLINE_END);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
void print_debug(string text) {
|
void print_debug(string text) {
|
||||||
@@ -301,148 +271,143 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
|
|||||||
vector<bool> out;
|
vector<bool> out;
|
||||||
for (int i=0;i<nodes->size();i++) {
|
for (int i=0;i<nodes->size();i++) {
|
||||||
string type=string(ts_node_type(nodes->at(i)));
|
string type=string(ts_node_type(nodes->at(i)));
|
||||||
string supertext=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
|
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
|
||||||
if (type=="#if") {
|
if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="field_identifier" || type=="preproc_arg" || type=="escape_sequence" || type=="character" || type=="statement_identifier") {
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IF);
|
|
||||||
print_debug("if");
|
|
||||||
} else if (type=="#ifdef") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IFDEF);
|
|
||||||
print_debug("ifdef");
|
|
||||||
} else if (type=="#ifndef") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IFNDEF);
|
|
||||||
print_debug("ifndef");
|
|
||||||
} else if (type=="#else") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELSE);
|
|
||||||
print_debug("else");
|
|
||||||
} else if (type=="#elif") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELIF);
|
|
||||||
print_debug("elif");
|
|
||||||
} else if (type=="#elifdef") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELIFDEF);
|
|
||||||
print_debug("elifdef");
|
|
||||||
} else if (type=="#elifndef") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELIFNDEF);
|
|
||||||
print_debug("elifndef");
|
|
||||||
} else if (type=="#endif") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ENDIF);
|
|
||||||
print_debug("endif");
|
|
||||||
} else if (type=="#define") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_DEFINE);
|
|
||||||
print_debug("define");
|
|
||||||
} else if (type=="#undef") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_UNDEF);
|
|
||||||
print_debug("undef");
|
|
||||||
} else if (type=="#include") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_INCLUDE);
|
|
||||||
print_debug("include");
|
|
||||||
} else if (type=="#error") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_ERROR);
|
|
||||||
print_debug("error");
|
|
||||||
} else if (type=="#warning") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_WARNING);
|
|
||||||
print_debug("warning");
|
|
||||||
} else if (type=="#pragma" || (type=="preproc_directive" && supertext=="#pragma")) {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_PRAGMA);
|
|
||||||
print_debug("pragma");
|
|
||||||
} else if (type=="#line") {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_LINE);
|
|
||||||
print_debug("line");
|
|
||||||
} else if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="type_identifier" || type=="field_identifier" || type=="preproc_arg" || type=="escape_sequence" || type=="character" || type=="statement_identifier") {
|
|
||||||
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
|
|
||||||
auto it=find(rec_list.begin(),rec_list.end(),text);
|
auto it=find(rec_list.begin(),rec_list.end(),text);
|
||||||
if (it==rec_list.end()) {
|
if (it==rec_list.end()) {
|
||||||
if (!text.empty()) {
|
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
|
||||||
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
|
CCC_ADD_COMPOMENT(out,generate_string_content(text));
|
||||||
CCC_ADD_COMPOMENT(out,generate_string_content(text));
|
print_debug("string ("+type+"): "+text);
|
||||||
print_debug("string ("+type+"): "+text);
|
|
||||||
} else {
|
|
||||||
auto it=find(delimiter.begin(),delimiter.end(),"");
|
|
||||||
size_t index=distance(delimiter.begin(),it);
|
|
||||||
CCC_ADD_COMPOMENT(out,generate_delimiter(index));
|
|
||||||
print_debug("delimiter for empty string");
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
size_t index=distance(rec_list.begin(),it);
|
size_t index=distance(rec_list.begin(),it);
|
||||||
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
|
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
|
||||||
print_debug("rec_table for string ("+type+"): "+text);
|
print_debug("rec_table for string ("+type+"): "+text);
|
||||||
}
|
}
|
||||||
} else if (type=="primitive_type") {
|
} else if (type=="primitive_type" || type=="type_identifier") {
|
||||||
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
|
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
|
||||||
auto it=find(c_keywords.begin(),c_keywords.end(),text);
|
auto it=find(c_keywords.begin(),c_keywords.end(),text);
|
||||||
if (it!=c_keywords.end()) {
|
if (it!=c_keywords.end()) {
|
||||||
size_t index=distance(c_keywords.begin(),it);
|
size_t index=distance(c_keywords.begin(),it);
|
||||||
CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
|
CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
|
||||||
print_debug("primitive_type: "+text);
|
print_debug("type found in c keyword: "+text);
|
||||||
} else {
|
} else {
|
||||||
auto it=find(rec_list.begin(),rec_list.end(),text);
|
auto it=find(rec_list.begin(),rec_list.end(),text);
|
||||||
if (it==rec_list.end()) {
|
if (it==rec_list.end()) {
|
||||||
if (!text.empty()) {
|
if (!text.empty()) {
|
||||||
CCC_ADD_COMPOMENT(out,generate_string_content(text));
|
CCC_ADD_COMPOMENT(out,generate_string_content(text));
|
||||||
print_debug("string ("+type+"): "+text);
|
print_debug("string for type ("+type+"): "+text);
|
||||||
} else {
|
} else {
|
||||||
cout<<"Error: provided primitive is empty: "<<text<<endl;;
|
cout<<"Warning: provided primitive is empty: "<<text<<endl;
|
||||||
exit(-1);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
size_t index=distance(rec_list.begin(),it);
|
size_t index=distance(rec_list.begin(),it);
|
||||||
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
|
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
|
||||||
print_debug("rec_table for string ("+type+"): "+text);
|
print_debug("rec_table for string for type ("+type+"): "+text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (find(delimiter.begin(),delimiter.end(),type)!=delimiter.end()) {
|
} else if (find(delimiter0.begin(),delimiter0.end(),type)!=delimiter0.end() || find(delimiter1.begin(),delimiter1.end(),type)!=delimiter1.end() || type=="\"") {
|
||||||
string text;
|
string insert;
|
||||||
if (type=="(" && i+1<nodes->size()) {
|
if (type=="(" && i+1<nodes->size()) {
|
||||||
if (string(ts_node_type(nodes->at(i+1)))==")") {
|
if (string(ts_node_type(nodes->at(i+1)))==")") {
|
||||||
text="()";
|
insert="()";
|
||||||
i++;
|
i++;
|
||||||
} else {
|
} else {
|
||||||
text="(";
|
insert="(";
|
||||||
}
|
}
|
||||||
} else if (type=="[" && i+1<nodes->size()) {
|
} else if (type=="[" && i+1<nodes->size()) {
|
||||||
if (string(ts_node_type(nodes->at(i+1)))=="]") {
|
if (string(ts_node_type(nodes->at(i+1)))=="]") {
|
||||||
text="[]";
|
insert="[]";
|
||||||
i++;
|
i++;
|
||||||
} else {
|
} else {
|
||||||
text="[";
|
insert="[";
|
||||||
}
|
}
|
||||||
} else if (type=="{" && i+1<nodes->size()) {
|
} else if (type=="{" && i+1<nodes->size()) {
|
||||||
if (string(ts_node_type(nodes->at(i+1)))=="}") {
|
if (string(ts_node_type(nodes->at(i+1)))=="}") {
|
||||||
text="{}";
|
insert="{}";
|
||||||
i++;
|
i++;
|
||||||
} else {
|
} else {
|
||||||
text="{";
|
insert="{";
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
text=type;
|
insert=type;
|
||||||
}
|
}
|
||||||
auto it=find(delimiter.begin(),delimiter.end(),text);
|
auto it=find(delimiter0.begin(),delimiter0.end(),insert);
|
||||||
if (it!=delimiter.end()) {
|
if (it!=delimiter0.end()) {
|
||||||
size_t index=distance(delimiter.begin(),it);
|
size_t index=distance(delimiter0.begin(),it);
|
||||||
CCC_ADD_COMPOMENT(out,generate_delimiter(index));
|
CCC_ADD_COMPOMENT(out,generate_delimiter0(index));
|
||||||
print_debug("delimiter: "+text);
|
print_debug("delimiter 0: "+insert);
|
||||||
} else {
|
} else {
|
||||||
cout<<"Error: unknow delimiter, that shouldn't happen: "<<text<<endl;;
|
if (insert!="{}" && insert!="\"") {
|
||||||
exit(-1);
|
auto it=find(delimiter1.begin(),delimiter1.end(),insert);
|
||||||
|
if (it!=delimiter1.end()) {
|
||||||
|
size_t index=distance(delimiter1.begin(),it);
|
||||||
|
CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
|
||||||
|
print_debug("delimiter 1: "+insert);
|
||||||
|
} else {
|
||||||
|
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
|
||||||
|
// exit(-1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (insert=="{}") {
|
||||||
|
auto it=find(delimiter1.begin(),delimiter1.end(),"{}");
|
||||||
|
if (it!=delimiter1.end()) {
|
||||||
|
size_t index=distance(delimiter1.begin(),it);
|
||||||
|
CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
|
||||||
|
CCC_ADD_COMPOMENT(out,{0});
|
||||||
|
print_debug("delimiter 1: "+insert);
|
||||||
|
} else {
|
||||||
|
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
|
||||||
|
// exit(-1);
|
||||||
|
}
|
||||||
|
} else if (insert=="\"") {
|
||||||
|
auto it=find(delimiter1.begin(),delimiter1.end(),"{}");
|
||||||
|
if (it!=delimiter1.end()) {
|
||||||
|
size_t index=distance(delimiter1.begin(),it);
|
||||||
|
CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
|
||||||
|
CCC_ADD_COMPOMENT(out,{1});
|
||||||
|
print_debug("delimiter 1: "+insert);
|
||||||
|
} else {
|
||||||
|
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
|
||||||
|
// exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (find(other_grammer.begin(),other_grammer.end(),type)!=other_grammer.end()) {
|
} else if (find(c_keywords.begin(),c_keywords.end(),type)!=c_keywords.end() || type=="preproc_directive") {
|
||||||
auto it=find(other_grammer.begin(),other_grammer.end(),type);
|
if (type!="preproc_directive") {
|
||||||
if (it!=other_grammer.end()) {
|
auto it=find(c_keywords.begin(),c_keywords.end(),type);
|
||||||
size_t index=distance(other_grammer.begin(),it);
|
if (it!=c_keywords.end()) {
|
||||||
CCC_ADD_COMPOMENT(out,generate_other_grammar(index));
|
size_t index=distance(c_keywords.begin(),it);
|
||||||
print_debug("other grammar: "+type);
|
CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
|
||||||
|
print_debug("c keyword: "+type);
|
||||||
|
} else {
|
||||||
|
cout<<"Error: unknow C keyword, that shouldn't happen: "<<type<<" "<<text<<endl;;
|
||||||
|
// exit(-1);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
cout<<"Error: unknow other grammar symbol, that shouldn't happen: "<<type<<endl;;
|
auto it=find(c_keywords.begin(),c_keywords.end(),text);
|
||||||
exit(-1);
|
if (it!=c_keywords.end()) {
|
||||||
}
|
size_t index=distance(c_keywords.begin(),it);
|
||||||
} else if (find(c_keywords.begin(),c_keywords.end(),type)!=c_keywords.end()) {
|
CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
|
||||||
auto it=find(c_keywords.begin(),c_keywords.end(),type);
|
print_debug("c keyword: "+type);
|
||||||
if (it!=c_keywords.end()) {
|
} else {
|
||||||
size_t index=distance(c_keywords.begin(),it);
|
auto it=find(rec_list.begin(),rec_list.end(),text);
|
||||||
CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
|
if (it==rec_list.end()) {
|
||||||
print_debug("c keyword: "+type);
|
if (!text.empty()) {
|
||||||
} else {
|
CCC_ADD_COMPOMENT(out,generate_string_content(text));
|
||||||
cout<<"Error: unknow C keyword, that shouldn't happen: "<<type<<endl;;
|
print_debug("string for c keyword ("+type+"): "+text);
|
||||||
exit(-1);
|
} else {
|
||||||
|
cout<<"Warning: C keyword is empty: "<<text<<endl;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
size_t index=distance(rec_list.begin(),it);
|
||||||
|
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
|
||||||
|
print_debug("rec_table for string for c keyword ("+type+"): "+text);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (find(miscellaneous.begin(),miscellaneous.end(),type)!=miscellaneous.end()) {
|
} else if (find(miscellaneous.begin(),miscellaneous.end(),type)!=miscellaneous.end()) {
|
||||||
auto it=find(miscellaneous.begin(),miscellaneous.end(),type);
|
auto it=find(miscellaneous.begin(),miscellaneous.end(),type);
|
||||||
@@ -452,37 +417,42 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
|
|||||||
print_debug("miscellaneous: "+type);
|
print_debug("miscellaneous: "+type);
|
||||||
} else {
|
} else {
|
||||||
cout<<"Error: unknow miscellaneous, that shouldn't happen: "<<type<<endl;;
|
cout<<"Error: unknow miscellaneous, that shouldn't happen: "<<type<<endl;;
|
||||||
exit(-1);
|
// exit(-1);
|
||||||
}
|
}
|
||||||
} else if (type=="comment") {
|
} else if (type=="comment") {
|
||||||
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
|
|
||||||
auto it=find(rec_list.begin(),rec_list.end(),text);
|
auto it=find(rec_list.begin(),rec_list.end(),text);
|
||||||
if (it==rec_list.end()) {
|
if (it==rec_list.end()) {
|
||||||
cout<<"Error: comment in reccurences map not found: "<<text<<endl;;
|
if (it==rec_list.end()) {
|
||||||
exit(-1);
|
if (!text.empty()) {
|
||||||
|
CCC_ADD_COMPOMENT(out,generate_string_content(text));
|
||||||
|
print_debug("string for comment("+type+"): "+text);
|
||||||
|
} else {
|
||||||
|
cout<<"Warning: unknow node is empty: "<<text<<endl;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
size_t index=distance(rec_list.begin(),it);
|
||||||
|
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
|
||||||
|
print_debug("rec_table for string for comment ("+type+"): "+text);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
size_t index=distance(rec_list.begin(),it);
|
size_t index=distance(rec_list.begin(),it);
|
||||||
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
|
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
|
||||||
print_debug("rec_table for comment");
|
print_debug("rec_table for comment");
|
||||||
}
|
}
|
||||||
} else if (type=="\"") {
|
|
||||||
if (i+1<nodes->size()) {
|
|
||||||
if (string(ts_node_type(nodes->at(i+1)))=="\"") {
|
|
||||||
auto it=find(delimiter.begin(),delimiter.end(),"");
|
|
||||||
size_t index=distance(delimiter.begin(),it);
|
|
||||||
CCC_ADD_COMPOMENT(out,generate_delimiter(index));
|
|
||||||
print_debug("double quotes mark, inserting delimiter for empty string");
|
|
||||||
i++;
|
|
||||||
} else {
|
|
||||||
CCC_ADD_COMPOMENT(out,CCC_QUOTE);
|
|
||||||
print_debug("single quote mark");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
|
auto it=find(rec_list.begin(),rec_list.end(),text);
|
||||||
cout<<"Error: unknow node type: "<<type<<endl;
|
if (it==rec_list.end()) {
|
||||||
cout<<"Error: unknow node text: "<<text<<endl;
|
if (!text.empty()) {
|
||||||
exit(-1);
|
CCC_ADD_COMPOMENT(out,generate_string_content(text));
|
||||||
|
print_debug("string for unknow node ("+type+"): "+text);
|
||||||
|
} else {
|
||||||
|
cout<<"Warning: unknow node is empty: "<<text<<endl;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
size_t index=distance(rec_list.begin(),it);
|
||||||
|
CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
|
||||||
|
print_debug("rec_table for string for unknow node ("+type+"): "+text);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
vector<unsigned char> payload_bytes;
|
vector<unsigned char> payload_bytes;
|
||||||
@@ -517,6 +487,7 @@ void construct_rec_table(vector<string> &files_content,vector<string> files_name
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
int main(int argc,char **argv) {
|
int main(int argc,char **argv) {
|
||||||
|
cout<<c_keywords.size()<<endl;
|
||||||
if (argc<2) {
|
if (argc<2) {
|
||||||
cout<<"Usage: ccc [FILES]"<<endl;
|
cout<<"Usage: ccc [FILES]"<<endl;
|
||||||
return -1;
|
return -1;
|
||||||
|
|||||||
7
hello.c
7
hello.c
@@ -2,13 +2,8 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
typedef static unsigned char HEY;
|
typedef static unsigned char HEY;
|
||||||
// hello
|
// hello
|
||||||
// hello
|
|
||||||
// hello
|
|
||||||
// hello
|
|
||||||
// hello
|
|
||||||
// hello
|
|
||||||
int main() {
|
int main() {
|
||||||
hello[]="hello";
|
char hello[]="hello";
|
||||||
HEY res=8;
|
HEY res=8;
|
||||||
if (res!=9) {
|
if (res!=9) {
|
||||||
printf(hello);
|
printf(hello);
|
||||||
|
|||||||
BIN
linux_sources.tar.xz
Normal file
BIN
linux_sources.tar.xz
Normal file
Binary file not shown.
69
test.py
Normal file
69
test.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
|
||||||
|
def get_source_files(root_dir):
|
||||||
|
"""Récupère les fichiers et calcule la taille totale."""
|
||||||
|
source_files = []
|
||||||
|
total_size = 0
|
||||||
|
for root, _, files in os.walk(root_dir):
|
||||||
|
for file in files:
|
||||||
|
if file.endswith(('.c', '.h')) and len(source_files)<10000:
|
||||||
|
path = os.path.join(root, file)
|
||||||
|
source_files.append(path)
|
||||||
|
total_size += os.path.getsize(path)
|
||||||
|
return source_files, total_size
|
||||||
|
|
||||||
|
def main():
|
||||||
|
target_dir = "linux"
|
||||||
|
if not os.path.exists(target_dir):
|
||||||
|
print(f"Erreur: Le dossier {target_dir} n'existe pas.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"--- Analyse de {target_dir} ---")
|
||||||
|
files, total_raw_size = get_source_files(target_dir)
|
||||||
|
raw_mo = total_raw_size / (1024 * 1024)
|
||||||
|
print(f"Fichiers trouvés : {len(files)}")
|
||||||
|
print(f"Taille totale brute : {raw_mo:.2f} Mo")
|
||||||
|
|
||||||
|
# 1. Compression avec TAR
|
||||||
|
print("\n--- Lancement de TAR -cJf (XZ) ---")
|
||||||
|
start_tar = time.time()
|
||||||
|
tar_cmd = ["tar", "-cJf", "linux_sources.tar.xz", "--files-from=-"]
|
||||||
|
process_tar = subprocess.Popen(tar_cmd, stdin=subprocess.PIPE)
|
||||||
|
process_tar.communicate(input="\n".join(files).encode())
|
||||||
|
end_tar = time.time()
|
||||||
|
|
||||||
|
# 2. Compression avec CCC
|
||||||
|
print("\n--- Lancement de CCC (Output temps réel) ---")
|
||||||
|
print("-" * 40)
|
||||||
|
start_ccc = time.time()
|
||||||
|
try:
|
||||||
|
# On laisse stdout et stderr par défaut pour voir l'output de CCC
|
||||||
|
subprocess.run(["./ccc"] + files, check=True)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"\nErreur fatale CCC : {e}")
|
||||||
|
except OSError as e:
|
||||||
|
print(f"\nErreur système (trop de fichiers ?) : {e}")
|
||||||
|
return
|
||||||
|
end_ccc = time.time()
|
||||||
|
print("-" * 40)
|
||||||
|
|
||||||
|
# 3. Calculs finaux
|
||||||
|
print("\n" + "="*40)
|
||||||
|
print(f" RÉSULTATS (Source: {raw_mo:.2f} Mo)")
|
||||||
|
print("="*40)
|
||||||
|
|
||||||
|
for name, filename in [("TAR.XZ", "linux_sources.tar.xz"), ("CCC", "test.ccc")]:
|
||||||
|
if os.path.exists(filename):
|
||||||
|
size_mo = os.path.getsize(filename) / (1024 * 1024)
|
||||||
|
ratio = (size_mo / raw_mo) * 100
|
||||||
|
print(f"{name:10} : {size_mo:8.2f} Mo ({ratio:5.2f}% du total)")
|
||||||
|
else:
|
||||||
|
print(f"{name:10} : Non généré")
|
||||||
|
|
||||||
|
print(f"\nTemps TAR : {end_tar - start_tar:.2f}s")
|
||||||
|
print(f"Temps CCC : {end_ccc - start_ccc:.2f}s")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user