From 4455c445725f68c6764d0e8b731d2d68db13beb9 Mon Sep 17 00:00:00 2001
From: bloodstalker <thabogre@gmail.com>
Date: Sat, 9 Dec 2017 20:43:40 +0330
Subject: obfuscator can remove comments now. it can handle some hairy stuff.
 its better than vim's syntax highlighter anyways

---
 obfuscator/README.md      | 11 ++++---
 obfuscator/obfuscator.cpp | 73 ++++++++++++++++++++++++++++++++---------------
 2 files changed, 57 insertions(+), 27 deletions(-)

(limited to 'obfuscator')
diff --git a/obfuscator/README.md b/obfuscator/README.md
index 585c054..1fff79f 100644
--- a/obfuscator/README.md
+++ b/obfuscator/README.md
@@ -3,16 +3,19 @@
 obfuscator is a C/C++ source-code obfuscation tool.<br/>
 
 ## Status
-obfuscator is not feature-complete yet. Below you can find a list of the implemented features and the ones that will be implemented.<br/>
+You can find a list of the implemented features and the ones that will be implemented below.<br/>
 If you have suggestions or recommendations for features to add please make an issue with the `obfuscator` label.<br/>
 
 ### Implemented Features
 * Identifier Obfuscation: Swaps the name of all identifiers with their hash. <br/>
+* Comment Deletion: Deletes all comments.<br/>
 
 ### Future Features
 * Obfuscation Exclusion List: obfuscator will accept a list of idenftifiers and their namespace and will not obfuscate those. This feature is added so the user can refrain from obfuscating the standard library.<br/>
-* Whitespace Deletion: Pretty much kills all whitespace where it doesn't change the syntax.<br/>
-* Comment Deletion: Deletes all comments.<br/>
+* Support directory-wide and multiple files as input.<br/>
+* Provide an option to choose which hashing function to use.<br/>
 
 ## Notes
-* Currently the hash function that is being used is `std::hash<>`. The GCC implementation will be probably the default option since the digest is shorter than 32 characters long. The decision was made since quite a few embedded C/C++ compilers can't correctly handle identifiers longer than 32 characters(implementation limitations).<br/>
+* Currently the hash function that is being used is `std::hash<>`. The GCC implementation will be probably the default option since the digest is shorter than 32 characters long. The decision was made since quite a few embedded C/C++ compilers can't correctly handle identifiers longer than 32 characters.<br/>
+* If you want the preprocessor conditionals that evaluate to false that pertain to source code inclusion to be removed, use your compiler to get the output of the preprocessor and then pass that to `obfuscator` or just outright don't include them in the source code since `obfuscator` will not look through things that are not included in the source code by the preprocessor.<br/>
+* Getting rid of the whitespaces in the source code is a moot point since reverting it is as easy as running something like `clang-format` on it, so the feature is not currently included in obfuscator.<br/>
diff --git a/obfuscator/obfuscator.cpp b/obfuscator/obfuscator.cpp
index a028955..f3a1b16 100644
--- a/obfuscator/obfuscator.cpp
+++ b/obfuscator/obfuscator.cpp
@@ -234,8 +234,10 @@ public:
     std::cout << "Macro name: " << macroname << " Hash: " << hash << " New ID: " << newname << "\n";
 #endif
 
-    //std::string dummy = Rewrite.getRewrittenText(SourceRange(MacroNameTok.getLocation(), MacroNameTok.getLocation().getLocWithOffset(MacroNameTok.getLength())));
-    //std::cout << dummy << "\n";
+#ifdef DBG
+    std::string dummy = Rewrite.getRewrittenText(SourceRange(MacroNameTok.getLocation(), MacroNameTok.getLocation().getLocWithOffset(MacroNameTok.getLength())));
+    std::cout << dummy << "\n";
+#endif
     Rewrite.ReplaceText(SourceRange(MacroNameTok.getLocation(), MacroNameTok.getLocation().getLocWithOffset(MacroNameTok.getLength())), newname);
   }
 
@@ -310,35 +312,59 @@ class CommentWiper {
 
     int run(void) {
       for (auto &filepath : sourcelist) {
-        //std::regex comment1("//.+");
-        //std::regex comment2("/\\*.+\\*/");
-        //std::regex comment2("/\\*[^]+\\*/");
-        //std::regex multibegin("/\\*.+(?!\\*/)");
-        //std::regex multiend("(?<!/\\*).+\\*/");
-        //std::smatch result;
-        //bool multiline;
-        bool slash, backslash, quote, star, multiline;
-        unsigned int ch_prv;
-
         std::ifstream sourcefile;
         sourcefile.open("../test/bruisertest/obfuscator-tee");
         std::ofstream dupe;
         dupe.open("./dupe.cpp");
         std::string line;
+
+        int d_quote = 0;
+        bool skip = false;
+
         while (std::getline(sourcefile, line)) {
+          std::string dummy;
           line += "\n";
           for (unsigned int ch = 0; ch < line.length(); ++ch) {
-            if (line[ch] == atoi("/")) slash = true;
-            if (line[ch] == atoi("\\")) backslash = true;
-            if (line[ch] == atoi("\"")) quote = true;
-            if (line[ch] == atoi("*")) star = true;
-            ch_prv = ch;
+            if (!skip) {
+              if ((line[ch] == '\"' || line[ch] == '\'')) {
+                if (ch > 1) {
+                  if (line[ch - 1] != '\\') {
+                    d_quote++;
+                    if (d_quote % 1 == 1) {
+                      continue;
+                    }
+                  }
+                }
+              }
+            }
+            if (line[ch] == '/') {
+              if (ch > 1) {
+                if (line[ch - 1] == '/' && !skip) {
+                  if (dummy.length() > 1) {
+                    dummy.erase(dummy.length() - 1);
+                  }
+                  break;
+                }
+                if (line[ch - 1] == '*') {
+                  skip = false;
+                  continue;
+                }
+              }
+              if (ch < line.length() - 1) {
+                if (line[ch + 1] == '*' && !skip) {
+                  skip = true;
+                  continue;
+                }
+              }
+            }
+            if (!skip) dummy.push_back(line[ch]);
           }
-          //if (std::regex_search(line, result, comment1)) {std::cout << "11111" << result.str() << "\n";}
-          //if (std::regex_search(line, result, comment2)) {std::cout << "22222" << result.str() << "\n";}
-          //if (std::regex_search(line, result, multibegin)) {std::cout << "33333" << result.str() << "\n";}
-          //if (std::regex_search(line, result, multiend)) {std::cout << "44444" << result.str() << "\n";}
-          dupe << line << "\n";
+          d_quote = 0;
+          if (dummy.length() > 1) {
+            if (dummy[dummy.length() - 1] != '\n') {dummy.push_back('\n');}
+          }
+          dupe << dummy;
+          dummy.clear();
         }
         sourcefile.close();
         dupe.close();
@@ -350,6 +376,7 @@ class CommentWiper {
     std::vector<std::string> sourcelist;
 };
 /**********************************************************************************************************************/
+/*@DEVI-not in a hurry to implement this. reverting this is as simple as running something like clang-format.*/
 class WhitespaceWarper {
   public:
     WhitespaceWarper(std::vector<std::string> SourceList) : sourcelist(SourceList) {}
@@ -359,7 +386,7 @@ class WhitespaceWarper {
         std::ifstream sourcefile;
         sourcefile.open("../test/bruisertest/obfuscator-tee");
         std::ofstream dupe;
-        dupe.open("./dupe.cpp");
+        dupe.open("./dupe2.cpp");
         std::string line;
         while (std::getline(sourcefile, line)) {
           for (auto &character : line) {
-- 
cgit v1.2.3