improvements incoming

2026-02-06 00:01:23 +01:00
parent f281c71f75
commit 51ae29a898
6 changed files with 236 additions and 201 deletions
--- a/build.sh
+++ b/build.sh
@@ -1 +1 @@
-g++ ccc.cpp -o ccc -ltree-sitter -ltree-sitter-c -llzma
+g++ ccc.cpp -o ccc -ltree-sitter -ltree-sitter-c -llzma -Ofast -march=native
--- a/ccc.cpp
+++ b/ccc.cpp
@@ -14,56 +14,35 @@
 #include <lzma.h>
 using namespace std;
 namespace fs=filesystem;
-const vector<bool> CCC_C_KEYYORD_HEAD {0,0,0};
-const vector<bool> CCC_SPACE {0,1,1,1,0,0,1};
-const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_IF {0,0,1,0,0,0};
-const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_IFDEF {0,0,1,0,0,1};
-const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_IFNDEF {0,0,1,0,1,0};
-const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELSE {0,0,1,0,1,1};
-const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELIF {0,0,1,1,0,0};
-const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELIFDEF {0,0,1,1,0,1};
-const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ELIFNDEF {0,0,1,1,1,0};
-const vector<bool> CCC_PREPROCESSOR_CONDITIONAL_ENDIF {0,0,1,1,1,1};
-const vector<bool> CCC_PREPROCESSOR_OTHER_DEFINE {0,1,0,0,0,0};
-const vector<bool> CCC_PREPROCESSOR_OTHER_UNDEF {0,1,0,0,0,1};
-const vector<bool> CCC_PREPROCESSOR_OTHER_INCLUDE {0,1,0,0,1,0};
-const vector<bool> CCC_PREPROCESSOR_OTHER_ERROR {0,1,0,0,1,1};
-const vector<bool> CCC_PREPROCESSOR_OTHER_WARNING {0,1,0,1,0,0};
-const vector<bool> CCC_PREPROCESSOR_OTHER_PRAGMA {0,1,0,1,0,1};
-const vector<bool> CCC_PREPROCESSOR_OTHER_LINE {0,1,0,1,1,0};
-const vector<bool> CCC_QUOTE {0,1,0,1,1,1};
-const vector<bool> CCC_DELIMITER_HEAD {0,1,1};
-const vector<bool> CCC_OTHER_GRAMMAR_HEAD {1,0,0};
-const vector<bool> CCC_MISCELLANEOUS_HEAD {1,0,1};
-const vector<bool> CCC_REC_TABLE_REF_HEAD {1,1,0};
-const vector<bool> CCC_STRING_ASCII {1,1,1,0};
-const vector<bool> CCC_STRING_UTF8 {1,1,1,1};
-const vector<bool> CCC_STRING_END_ASCII {0,0,0,0,0,0,0};
-const vector<bool> CCC_STRING_END_UTF8 {0,0,0,0,0,0,0,0};
+const vector<bool> CCC_DELIMITER_0_HEAD={0};
+const vector<bool> CCC_DELIMITER_1_HEAD={1,0};
+const vector<bool> CCC_C_KEYWORD_HEAD={1,1,0,0};
+const vector<bool> CCC_MISCELANEOUS_HEAD={1,1,0,1};
+const vector<bool> CCC_STRING_INLINE_HEAD={1,1,1,0};
+const vector<bool> CCC_REC_TABLE_REF_HEAD={1,1,1,1};
+const vector<bool> CCC_STRING_INLINE_END={0,0,0,0,0,0,0,0};
 #define CCC_ADD_COMPOMENT(vec,tail) \
  do { \
    auto tmp=tail; \
    vec.insert(vec.end(),tmp.begin(),tmp.end()); \
  } while (0)
-const vector<string> delimiter={
-  "\n",
-  "\t",
+const vector<string> delimiter0={
  "{",
  "}",
  "(",
  ")",
  "[",
  "]",
-  " ",
-  "{}",
-  "()",
-  "[]",
-  "",
-  ";",
  ",",
  "."
 };
-const vector<string> other_grammer={
+const vector<string> delimiter1={
+  "{}",
+  "()",
+  "[]",
+  ";"
+};
+const vector<string> miscellaneous={
  "!",
  "%",
  "'",
@@ -79,9 +58,7 @@ const vector<string> other_grammer={
  "^",
  "|",
  "&",
-  "~"
-};
-const vector<string> miscellaneous={
+  "~",
  "+=",
  "-=",
  "*=",
@@ -116,6 +93,21 @@ const vector<string> miscellaneous={
  "int64_t"
 };
 const vector<string> c_keywords={
+  "#if",
+  "#ifdef",
+  "#ifndef",
+  "#else",
+  "#elif",
+  "#elifdef",
+  "#elifndef",
+  "#endif",
+  "#define",
+  "#undef",
+  "#include",
+  "#error",
+  "#warning",
+  "#pragma",
+  "#line",
  "alignas",
  "alignof",
  "auto",
@@ -162,7 +154,8 @@ const vector<string> c_keywords={
  "volatile",
  "while",
  "__asm__",
-  "__attribute__"
+  "__attribute__",
+  "defined",
 };
 struct symbol {
  string name;
@@ -210,17 +203,9 @@ vector<bool> byte_to_bits(unsigned char c) {
  }
  return out;
 }
-vector<bool> ascii_to_bits(unsigned char c) {
-  vector<bool> out;
-  for (int i=6;i>=0;i--) {
-    bool enabled=(c>>i)&0x01;
-    out.push_back(enabled);
-  }
-  return out;
-}
 vector<bool> generate_c_keyword(size_t index) {
  vector<bool> out;
-  CCC_ADD_COMPOMENT(out,CCC_C_KEYYORD_HEAD);
+  CCC_ADD_COMPOMENT(out,CCC_C_KEYWORD_HEAD);
  for (int i=5;i>=0;i--) {
    bool enabled=(index>>i)&0x01;
    out.push_back(enabled);
@@ -241,19 +226,19 @@ vector<bool> generate_rec(size_t index,size_t total_recs) {
  }
  return out;
 }
-vector<bool> generate_delimiter(size_t index) {
+vector<bool> generate_delimiter0(size_t index) {
  vector<bool> out;
-  CCC_ADD_COMPOMENT(out,CCC_DELIMITER_HEAD);
-  for (int i=3;i>=0;i--) {
+  CCC_ADD_COMPOMENT(out,CCC_DELIMITER_0_HEAD);
+  for (int i=2;i>=0;i--) {
    bool enabled=(index>>i)&0x01;
    out.push_back(enabled);
  }
  return out;
 }
-vector<bool> generate_other_grammar(size_t index) {
+vector<bool> generate_delimiter1(size_t index) {
  vector<bool> out;
-  CCC_ADD_COMPOMENT(out,CCC_OTHER_GRAMMAR_HEAD);
-  for (int i=3;i>=0;i--) {
+  CCC_ADD_COMPOMENT(out,CCC_DELIMITER_1_HEAD);
+  for (int i=1;i>=0;i--) {
    bool enabled=(index>>i)&0x01;
    out.push_back(enabled);
  }
@@ -261,8 +246,8 @@ vector<bool> generate_other_grammar(size_t index) {
 }
 vector<bool> generate_miscellaneous(size_t index) {
  vector<bool> out;
-  CCC_ADD_COMPOMENT(out,CCC_MISCELLANEOUS_HEAD);
-  for (int i=4;i>=0;i--) {
+  CCC_ADD_COMPOMENT(out,CCC_MISCELANEOUS_HEAD);
+  for (int i=5;i>=0;i--) {
    bool enabled=(index>>i)&0x01;
    out.push_back(enabled);
  }
@@ -270,26 +255,11 @@ vector<bool> generate_miscellaneous(size_t index) {
 }
 vector<bool> generate_string_content(string str) {
  vector<bool> out;
-  bool is_utf8=false;
+  CCC_ADD_COMPOMENT(out,CCC_STRING_INLINE_HEAD);
  for (auto c:str) {
-    if (c>127) {
-      is_utf8=true;
-      break;
-    }
-  }
-  if (is_utf8) {
-    CCC_ADD_COMPOMENT(out,CCC_STRING_UTF8);
-    for (auto c:str) {
-      CCC_ADD_COMPOMENT(out,byte_to_bits(c));
-    }
-    CCC_ADD_COMPOMENT(out,CCC_STRING_END_UTF8);
-  } else {
-    CCC_ADD_COMPOMENT(out,CCC_STRING_ASCII);
-    for (auto c:str) {
-      CCC_ADD_COMPOMENT(out,ascii_to_bits(c));
-    }
-    CCC_ADD_COMPOMENT(out,CCC_STRING_END_ASCII);
+    CCC_ADD_COMPOMENT(out,byte_to_bits(c));
  }
+  CCC_ADD_COMPOMENT(out,CCC_STRING_INLINE_END);
  return out;
 }
 void print_debug(string text) {
@@ -301,148 +271,143 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
  vector<bool> out;
  for (int i=0;i<nodes->size();i++) {
    string type=string(ts_node_type(nodes->at(i)));
-    string supertext=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
-    if (type=="#if") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IF);
-      print_debug("if");
-    } else if (type=="#ifdef") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IFDEF);
-      print_debug("ifdef");
-    } else if (type=="#ifndef") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_IFNDEF);
-      print_debug("ifndef");
-    } else if (type=="#else") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELSE);
-      print_debug("else");
-    } else if (type=="#elif") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELIF);
-      print_debug("elif");
-    } else if (type=="#elifdef") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELIFDEF);
-      print_debug("elifdef");
-    } else if (type=="#elifndef") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ELIFNDEF);
-      print_debug("elifndef");
-    } else if (type=="#endif") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_CONDITIONAL_ENDIF);
-      print_debug("endif");
-    } else if (type=="#define") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_DEFINE);
-      print_debug("define");
-    } else if (type=="#undef") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_UNDEF);
-      print_debug("undef");
-    } else if (type=="#include") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_INCLUDE);
-      print_debug("include");
-    } else if (type=="#error") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_ERROR);
-      print_debug("error");
-    } else if (type=="#warning") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_WARNING);
-      print_debug("warning");
-    } else if (type=="#pragma" || (type=="preproc_directive" && supertext=="#pragma")) {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_PRAGMA);
-      print_debug("pragma");
-    } else if (type=="#line") {
-      CCC_ADD_COMPOMENT(out,CCC_PREPROCESSOR_OTHER_LINE);
-      print_debug("line");
-    } else if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="type_identifier" || type=="field_identifier" || type=="preproc_arg" || type=="escape_sequence" || type=="character" || type=="statement_identifier") {
-      string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
+    string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
+    if (type=="string_content" || type=="system_lib_string" || type=="identifier" || type=="number_literal" || type=="field_identifier" || type=="preproc_arg" || type=="escape_sequence" || type=="character" || type=="statement_identifier") {
      auto it=find(rec_list.begin(),rec_list.end(),text);
      if (it==rec_list.end()) {
-        if (!text.empty()) {
-          string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
-          CCC_ADD_COMPOMENT(out,generate_string_content(text));
-          print_debug("string ("+type+"): "+text);
-        } else {
-          auto it=find(delimiter.begin(),delimiter.end(),"");
-          size_t index=distance(delimiter.begin(),it);
-          CCC_ADD_COMPOMENT(out,generate_delimiter(index));
-          print_debug("delimiter for empty string");
-        }
+        string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
+        CCC_ADD_COMPOMENT(out,generate_string_content(text));
+        print_debug("string ("+type+"): "+text);
      } else {
        size_t index=distance(rec_list.begin(),it);
        CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
        print_debug("rec_table for string ("+type+"): "+text);
      }
-    } else if (type=="primitive_type") {
+    } else if (type=="primitive_type" || type=="type_identifier") {
      string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
      auto it=find(c_keywords.begin(),c_keywords.end(),text);
      if (it!=c_keywords.end()) {
        size_t index=distance(c_keywords.begin(),it);
        CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
-        print_debug("primitive_type: "+text);
+        print_debug("type found in c keyword: "+text);
      } else {
        auto it=find(rec_list.begin(),rec_list.end(),text);
        if (it==rec_list.end()) {
          if (!text.empty()) {
            CCC_ADD_COMPOMENT(out,generate_string_content(text));
-            print_debug("string ("+type+"): "+text);
+            print_debug("string for type ("+type+"): "+text);
          } else {
-            cout<<"Error: provided primitive is empty: "<<text<<endl;;
-            exit(-1);
+            cout<<"Warning: provided primitive is empty: "<<text<<endl;
          }
        } else {
          size_t index=distance(rec_list.begin(),it);
          CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
-          print_debug("rec_table for string ("+type+"): "+text);
+          print_debug("rec_table for string for type ("+type+"): "+text);
        }
      }
-    } else if (find(delimiter.begin(),delimiter.end(),type)!=delimiter.end()) {
-      string text;
+    } else if (find(delimiter0.begin(),delimiter0.end(),type)!=delimiter0.end() || find(delimiter1.begin(),delimiter1.end(),type)!=delimiter1.end() || type=="\"") {
+      string insert;
      if (type=="(" && i+1<nodes->size()) {
        if (string(ts_node_type(nodes->at(i+1)))==")") {
-          text="()";
+          insert="()";
          i++;
        } else {
-          text="(";
+          insert="(";
        }
      } else if (type=="[" && i+1<nodes->size()) {
        if (string(ts_node_type(nodes->at(i+1)))=="]") {
-          text="[]";
+          insert="[]";
          i++;
        } else {
-          text="[";
+          insert="[";
        }
      } else if (type=="{" && i+1<nodes->size()) {
        if (string(ts_node_type(nodes->at(i+1)))=="}") {
-          text="{}";
+          insert="{}";
          i++;
        } else {
-          text="{";
+          insert="{";
        }
      } else {
-        text=type;
+        insert=type;
      }
-      auto it=find(delimiter.begin(),delimiter.end(),text);
-      if (it!=delimiter.end()) {
-        size_t index=distance(delimiter.begin(),it);
-        CCC_ADD_COMPOMENT(out,generate_delimiter(index));
-        print_debug("delimiter: "+text);
+      auto it=find(delimiter0.begin(),delimiter0.end(),insert);
+      if (it!=delimiter0.end()) {
+        size_t index=distance(delimiter0.begin(),it);
+        CCC_ADD_COMPOMENT(out,generate_delimiter0(index));
+        print_debug("delimiter 0: "+insert);
      } else {
-        cout<<"Error: unknow delimiter, that shouldn't happen: "<<text<<endl;;
-        exit(-1);
+        if (insert!="{}" && insert!="\"") {
+          auto it=find(delimiter1.begin(),delimiter1.end(),insert);
+          if (it!=delimiter1.end()) {
+            size_t index=distance(delimiter1.begin(),it);
+            CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
+            print_debug("delimiter 1: "+insert);
+          } else {
+            cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
+            // exit(-1);
+          }
+        } else {
+          if (insert=="{}") {
+            auto it=find(delimiter1.begin(),delimiter1.end(),"{}");
+            if (it!=delimiter1.end()) {
+              size_t index=distance(delimiter1.begin(),it);
+              CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
+              CCC_ADD_COMPOMENT(out,{0});
+              print_debug("delimiter 1: "+insert);
+            } else {
+              cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
+              // exit(-1);
+            }
+          } else if (insert=="\"") {
+            auto it=find(delimiter1.begin(),delimiter1.end(),"{}");
+            if (it!=delimiter1.end()) {
+              size_t index=distance(delimiter1.begin(),it);
+              CCC_ADD_COMPOMENT(out,generate_delimiter1(index));
+              CCC_ADD_COMPOMENT(out,{1});
+              print_debug("delimiter 1: "+insert);
+            } else {
+              cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
+              exit(-1);
+            }
+          } else {
+            cout<<"Error: unknow delimiter, that shouldn't happen: "<<insert<<endl;;
+            // exit(-1);
+          }
+        }
      }
-    } else if (find(other_grammer.begin(),other_grammer.end(),type)!=other_grammer.end()) {
-      auto it=find(other_grammer.begin(),other_grammer.end(),type);
-      if (it!=other_grammer.end()) {
-        size_t index=distance(other_grammer.begin(),it);
-        CCC_ADD_COMPOMENT(out,generate_other_grammar(index));
-        print_debug("other grammar: "+type);
+    } else if (find(c_keywords.begin(),c_keywords.end(),type)!=c_keywords.end() || type=="preproc_directive") {
+      if (type!="preproc_directive") {
+        auto it=find(c_keywords.begin(),c_keywords.end(),type);
+        if (it!=c_keywords.end()) {
+          size_t index=distance(c_keywords.begin(),it);
+          CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
+          print_debug("c keyword: "+type);
+        } else {
+          cout<<"Error: unknow C keyword, that shouldn't happen: "<<type<<" "<<text<<endl;;
+          // exit(-1);
+        }
      } else {
-        cout<<"Error: unknow other grammar symbol, that shouldn't happen: "<<type<<endl;;
-        exit(-1);
-      }
-    } else if (find(c_keywords.begin(),c_keywords.end(),type)!=c_keywords.end()) {
-      auto it=find(c_keywords.begin(),c_keywords.end(),type);
-      if (it!=c_keywords.end()) {
-        size_t index=distance(c_keywords.begin(),it);
-        CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
-        print_debug("c keyword: "+type);
-      } else {
-        cout<<"Error: unknow C keyword, that shouldn't happen: "<<type<<endl;;
-        exit(-1);
+        auto it=find(c_keywords.begin(),c_keywords.end(),text);
+        if (it!=c_keywords.end()) {
+          size_t index=distance(c_keywords.begin(),it);
+          CCC_ADD_COMPOMENT(out,generate_c_keyword(index));
+          print_debug("c keyword: "+type);
+        } else {
+          auto it=find(rec_list.begin(),rec_list.end(),text);
+          if (it==rec_list.end()) {
+            if (!text.empty()) {
+              CCC_ADD_COMPOMENT(out,generate_string_content(text));
+              print_debug("string for c keyword ("+type+"): "+text);
+            } else {
+              cout<<"Warning: C keyword is empty: "<<text<<endl;
+            }
+          } else {
+            size_t index=distance(rec_list.begin(),it);
+            CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
+            print_debug("rec_table for string for c keyword ("+type+"): "+text);
+          }
+        }
      }
    } else if (find(miscellaneous.begin(),miscellaneous.end(),type)!=miscellaneous.end()) {
      auto it=find(miscellaneous.begin(),miscellaneous.end(),type);
@@ -452,37 +417,42 @@ vector<unsigned char> process_file_nodes(vector<TSNode> *nodes,string code,vecto
        print_debug("miscellaneous: "+type);
      } else {
        cout<<"Error: unknow miscellaneous, that shouldn't happen: "<<type<<endl;;
-        exit(-1);
+        // exit(-1);
      }
    } else if (type=="comment") {
-      string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
      auto it=find(rec_list.begin(),rec_list.end(),text);
      if (it==rec_list.end()) {
-        cout<<"Error: comment in reccurences map not found: "<<text<<endl;;
-        exit(-1);
+        if (it==rec_list.end()) {
+          if (!text.empty()) {
+            CCC_ADD_COMPOMENT(out,generate_string_content(text));
+            print_debug("string for comment("+type+"): "+text);
+          } else {
+            cout<<"Warning: unknow node is empty: "<<text<<endl;
+          }
+        } else {
+          size_t index=distance(rec_list.begin(),it);
+          CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
+          print_debug("rec_table for string for comment ("+type+"): "+text);
+        }
      } else {
        size_t index=distance(rec_list.begin(),it);
        CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
        print_debug("rec_table for comment");
      }
-    } else if (type=="\"") {
-      if (i+1<nodes->size()) {
-        if (string(ts_node_type(nodes->at(i+1)))=="\"") {
-          auto it=find(delimiter.begin(),delimiter.end(),"");
-          size_t index=distance(delimiter.begin(),it);
-          CCC_ADD_COMPOMENT(out,generate_delimiter(index));
-          print_debug("double quotes mark, inserting delimiter for empty string");
-          i++;
-        } else {
-          CCC_ADD_COMPOMENT(out,CCC_QUOTE);
-          print_debug("single quote mark");
-        }
-      }
    } else {
-      string text=code.substr(ts_node_start_byte(nodes->at(i)),ts_node_end_byte(nodes->at(i))-ts_node_start_byte(nodes->at(i)));
-      cout<<"Error: unknow node type: "<<type<<endl;
-      cout<<"Error: unknow node text: "<<text<<endl;
-      exit(-1);
+      auto it=find(rec_list.begin(),rec_list.end(),text);
+      if (it==rec_list.end()) {
+        if (!text.empty()) {
+          CCC_ADD_COMPOMENT(out,generate_string_content(text));
+          print_debug("string for unknow node ("+type+"): "+text);
+        } else {
+          cout<<"Warning: unknow node is empty: "<<text<<endl;
+        }
+      } else {
+        size_t index=distance(rec_list.begin(),it);
+        CCC_ADD_COMPOMENT(out,generate_rec(index,rec_list.size()));
+        print_debug("rec_table for string for unknow node ("+type+"): "+text);
+      }
    }
  }
  vector<unsigned char> payload_bytes;
@@ -517,6 +487,7 @@ void construct_rec_table(vector<string> &files_content,vector<string> files_name
  }
 }
 int main(int argc,char **argv) {
+  cout<<c_keywords.size()<<endl;
  if (argc<2) {
    cout<<"Usage: ccc [FILES]"<<endl;
    return -1;
--- a/hello.c
+++ b/hello.c
@@ -2,13 +2,8 @@
 #include <stdint.h>
 typedef static unsigned char HEY;
 // hello
-// hello
-// hello
-// hello
-// hello
-// hello
 int main() {
-  hello[]="hello";
+  char hello[]="hello";
  HEY res=8;
  if (res!=9) {
    printf(hello);
--- a/linux_sources.tar.xz
+++ b/linux_sources.tar.xz
--- a/test.ccc
+++ b/test.ccc
--- a/test.py
+++ b/test.py
@@ -0,0 +1,69 @@
+import os
+import subprocess
+import time
+
+def get_source_files(root_dir):
+    """Récupère les fichiers et calcule la taille totale."""
+    source_files = []
+    total_size = 0
+    for root, _, files in os.walk(root_dir):
+        for file in files:
+            if file.endswith(('.c', '.h')) and len(source_files)<10000:
+                path = os.path.join(root, file)
+                source_files.append(path)
+                total_size += os.path.getsize(path)
+    return source_files, total_size
+
+def main():
+    target_dir = "linux"
+    if not os.path.exists(target_dir):
+        print(f"Erreur: Le dossier {target_dir} n'existe pas.")
+        return
+
+    print(f"--- Analyse de {target_dir} ---")
+    files, total_raw_size = get_source_files(target_dir)
+    raw_mo = total_raw_size / (1024 * 1024)
+    print(f"Fichiers trouvés : {len(files)}")
+    print(f"Taille totale brute : {raw_mo:.2f} Mo")
+
+    # 1. Compression avec TAR
+    print("\n--- Lancement de TAR -cJf (XZ) ---")
+    start_tar = time.time()
+    tar_cmd = ["tar", "-cJf", "linux_sources.tar.xz", "--files-from=-"]
+    process_tar = subprocess.Popen(tar_cmd, stdin=subprocess.PIPE)
+    process_tar.communicate(input="\n".join(files).encode())
+    end_tar = time.time()
+
+    # 2. Compression avec CCC
+    print("\n--- Lancement de CCC (Output temps réel) ---")
+    print("-" * 40)
+    start_ccc = time.time()
+    try:
+        # On laisse stdout et stderr par défaut pour voir l'output de CCC
+        subprocess.run(["./ccc"] + files, check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"\nErreur fatale CCC : {e}")
+    except OSError as e:
+        print(f"\nErreur système (trop de fichiers ?) : {e}")
+        return
+    end_ccc = time.time()
+    print("-" * 40)
+
+    # 3. Calculs finaux
+    print("\n" + "="*40)
+    print(f"      RÉSULTATS (Source: {raw_mo:.2f} Mo)")
+    print("="*40)
+
+    for name, filename in [("TAR.XZ", "linux_sources.tar.xz"), ("CCC", "test.ccc")]:
+        if os.path.exists(filename):
+            size_mo = os.path.getsize(filename) / (1024 * 1024)
+            ratio = (size_mo / raw_mo) * 100
+            print(f"{name:10} : {size_mo:8.2f} Mo ({ratio:5.2f}% du total)")
+        else:
+            print(f"{name:10} : Non généré")
+
+    print(f"\nTemps TAR : {end_tar - start_tar:.2f}s")
+    print(f"Temps CCC : {end_ccc - start_ccc:.2f}s")
+
+if __name__ == "__main__":
+    main()