resolves 27

author: bloodstalker <thabogre@gmail.com> 2018-08-10 00:32:09 +0000
committer: bloodstalker <thabogre@gmail.com> 2018-08-10 00:32:09 +0000
commit: ce225cf41a59a3ae9366b0a0770c6f97a6f4329e (patch)
tree: 725f06f377c25d66d0c18469e959ba0eefa5b109 /obfuscator
parent: WIP-added keccak-tiny to obfuscator (diff)
download: mutator-ce225cf41a59a3ae9366b0a0770c6f97a6f4329e.tar.gz
mutator-ce225cf41a59a3ae9366b0a0770c6f97a6f4329e.zip
4 files changed, 49 insertions, 25 deletions
diff --git a/obfuscator/README.md b/obfuscator/README.md
index 04d62e5..8cb0973 100644
--- a/obfuscator/README.md
+++ b/obfuscator/README.md
@@ -8,21 +8,22 @@ If you have suggestions or recommendations for features to add, please make an i
 When an alpha version is ready, `obfuscator` will get its own mirror repo.<br/>
 
 ### Implemented Features
-* Identifier Obfuscation: Swaps the name of all identifiers with their hash. <br/>
+* Identifier Obfuscation: Swaps the name of all identifiers with their hash.<br/>
 * Comment Deletion: Deletes all comments.<br/>
+* SHAKE: you can choose to use SHAKE128 or SHAKE256 as the hashing function.<br/>
 
 ### Future Features
 * Obfuscation Exclusion List: obfuscator will accept a list of idenftifiers and their namespace and will not obfuscate those. This feature is added so the user can refrain from obfuscating the standard library.<br/>
 * Support directory-wide and multiple files as input.<br/>
-* Provide an option to choose which hashing function to use. Ideally these alternative hashing algorithms would have variable length digests. Ther is also the idea of using different seeds to generate different digests for the same identifier names across each obfuscation.<br/>
-* Windows support<br/>
 
 ### Running the Test
 running `run.sh` should do the trick. Do note that you need to regenerate the compilation database for the test under the `test` directory to work. You could use `bear`. If you already have `bear`, just run `make clean && bear make`.<br/>
 
 ## Notes
-* Currently the hash function that is being used is `std::hash<>`. The GCC implementation will be probably the default option since the digest is shorter than 32 characters long. The decision was made since quite a few embedded C/C++ compilers can't correctly handle identifiers longer than 32 characters.<br/>
 * Obfuscator uses the Clang Frontend(CFE) libraries. Most embedded hardware require the use of their custom toolchains and that can result in clang complaining about custom pragmas.<br/>
 * If you want the preprocessor conditionals that evaluate to false that pertain to source code inclusion to be removed, use your compiler to get the output of the preprocessor and then pass that to `obfuscator` or just outright don't include them in the source code since `obfuscator` will not look through things that are not included in the source code by the preprocessor.<br/>
 * Getting rid of the whitespaces in the source code is a moot point since reverting it is as easy as running something like `clang-format` on it, so the feature is not currently included in obfuscator.<br/>
 * At a leter point in time, obfuscator will be moved to a mirror repo of it's own or the mirror might become the main repo.<br/>
+
+## Thanks
+obfuscator uses [keccak-tiny](https://github.com/coruus/keccak-tiny).<br/>
diff --git a/obfuscator/obfuscator.cpp b/obfuscator/obfuscator.cpp
index e28cb55..7920535 100644
--- a/obfuscator/obfuscator.cpp
+++ b/obfuscator/obfuscator.cpp
@@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.*
 #include <cstdlib>
 #include <dirent.h>
 #include <fstream>
+#include <iomanip>
 #include <iostream>
 #include <string>
 #include <vector>
@@ -57,6 +58,8 @@ namespace {
   static llvm::cl::OptionCategory ObfuscatorCat("Obfuscator custom options");
   std::string TMP_FILE = "";
 }
+cl::opt<uint32_t> SHAKE("shake", cl::desc("if set, the hashing algorithm changes to shake. the value determines whether to use shake128 or shake256."), cl::init(0), cl::cat(ObfuscatorCat), cl::ZeroOrMore);
+cl::opt<uint32_t> SHAKE_LEN("shake_len", cl::desc("length of the shake hash, the digest length will be twice this value."), cl::init(0), cl::cat(ObfuscatorCat), cl::ZeroOrMore);
 /**********************************************************************************************************************/
 //#define DBG
 // @DEVI-FIXME
@@ -70,9 +73,34 @@ namespace {
 #define TEMP_FILE "/tmp/obfuscator-tee"
 #endif
 /**********************************************************************************************************************/
+std::string hashWrapper(std::string name) {
+  if (!SHAKE) {
+    std::size_t hash = std::hash<std::string>{}(name);
+    return std::to_string(hash);
+  } else {
+    std::vector<uint8_t> in(name.begin(), name.end());
+    std::vector<uint8_t> out(256);
+    std::stringstream dummy_stream;
+    if (SHAKE == 128) {
+      if (SHAKE_LEN > 128) std::cout << "you have selected shake128 but the length is greater than 128.cant do it.\n";
+      shake128(&out[0], SHAKE_LEN, &in[0], name.length());
+      for (uint32_t i = 0; i < SHAKE_LEN; ++i) dummy_stream << std::hex << int(out[i]);
+      return std::string(dummy_stream.str());
+    }
+    else if (SHAKE == 256) {
+      if (SHAKE_LEN > 256) std::cout << "you have selected shake256 but the length is greater than 256.cant do it.\n";
+      shake256(&out[0], SHAKE_LEN, &in[0], name.length());
+      for (uint32_t i = 0; i < SHAKE_LEN; ++i) dummy_stream << std::hex << int(out[i]);
+      return std::string(dummy_stream.str());
+    } else {
+      return "";
+    }
+  }
+}
+
 /**
  * @brief Gets the list of all directories and sub-directories starting from a base directory.
- * @param _path where the the base directory is. 
+ * @param _path where the the base directory is.
  * @return Returns the list of all found dirs.
  * @warning WIP
  */
@@ -138,8 +166,7 @@ std::string nameMaker(std::string _name, std::string _extension, std::string _ex
  * @return Returns the new name.
  */
 std::string getHashedName(std::string _name) {
-  std::size_t hash = std::hash<std::string>{}(_name);
-  return "FILE" + std::to_string(hash);
+  return "FILE" + hashWrapper(_name);
 }
 
 /**
@@ -205,6 +232,7 @@ std::string getTempDir2() {
 
   return tmpdir_;
 }
+
 /**********************************************************************************************************************/
 /**
  * @brief MatchCallback for CallExpr.
@@ -217,8 +245,7 @@ class CalledFunc : public MatchFinder::MatchCallback {
       if (MR.Nodes.getNodeAs<clang::CallExpr>("calledfunc") != nullptr) {
         const CallExpr *CE = MR.Nodes.getNodeAs<clang::CallExpr>("calledfunc");
         std::string name = CE->getDirectCallee()->getNameInfo().getAsString();
-        std::size_t hash = std::hash<std::string>{}(name);
-        std::string newname = "ID" + std::to_string(hash);
+        std::string newname = "ID" + hashWrapper(name);
 #ifdef DBG
         std::cout << "CallExpr name: "  << name << " Hash: " << hash << " New ID: " << newname << "\n";
 #endif
@@ -246,8 +273,7 @@ class CalledVar : public MatchFinder::MatchCallback {
       if (MR.Nodes.getNodeAs<clang::DeclRefExpr>("calledvar") != nullptr) {
         const DeclRefExpr* DRE = MR.Nodes.getNodeAs<clang::DeclRefExpr>("calledvar");
         auto name = DRE->getNameInfo().getAsString();
-        std::size_t hash = std::hash<std::string>{}(name);
-        std::string newname = "ID" + std::to_string(hash);
+        std::string newname = "ID" + hashWrapper(name);
 #ifdef DBG
         std::cout << "DeclRefExpr name: "  << name << " Hash: " << hash << " New ID: " << newname << "\n";
 #endif
@@ -276,8 +302,7 @@ public:
       const FunctionDecl* FD = MR.Nodes.getNodeAs<clang::FunctionDecl>("funcdecl");
       std::string funcname = FD->getNameInfo().getAsString();
       if (funcname == "main") return void();
-      std::size_t hash = std::hash<std::string>{}(funcname);
-      std::string newname = "ID" + std::to_string(hash);
+      std::string newname = "ID" + hashWrapper(funcname);
 #ifdef DBG
       std::cout << "Function name: "  << funcname << " Hash: " << hash << " New ID: " << newname << "\n";
 #endif
@@ -306,8 +331,7 @@ public:
     if (MR.Nodes.getNodeAs<clang::VarDecl>("vardecl") != nullptr) {
       const VarDecl* VD = MR.Nodes.getNodeAs<clang::VarDecl>("vardecl");
       std::string varname = VD->getIdentifier()->getName().str();
-      std::size_t hash = std::hash<std::string>{}(varname);
-      std::string newname = "ID" + std::to_string(hash);
+      std::string newname = "ID" + hashWrapper(varname);
 #ifdef DBG
       std::cout << "Var name: "  << varname << " Hash: " << hash << " New ID: " << newname << "\n";
 #endif
@@ -347,8 +371,7 @@ class ClassDecl : public MatchFinder::MatchCallback {
         else {return void();}
         //auto TD = RD->getCanonicalDecl();
         std::string varname = RD->getIdentifier()->getName().str();
-        std::size_t hash = std::hash<std::string>{}(varname);
-        std::string newname = "ID" + std::to_string(hash);
+        std::string newname = "ID" + hashWrapper(varname);
 #ifdef DBG
         std::cout << "Record name: "  << varname << " Hash: " << hash << " New ID: " << newname << "\n";
 #endif
@@ -379,8 +402,7 @@ public:
     if (!SM.isWrittenInMainFile(SL)) return void();
     CheckSLValidity(SL);
     std::string macroname = MacroNameTok.getIdentifierInfo()->getName().str();
-    std::size_t hash = std::hash<std::string>{}(macroname);
-    std::string newname = "ID" + std::to_string(hash);
+    std::string newname = "ID" + hashWrapper(macroname);
 #ifdef DBG
     std::cout << "Macro name: " << macroname << " Hash: " << hash << " New ID: " << newname << "\n";
 #endif
@@ -398,8 +420,7 @@ public:
     if (!SM.isWrittenInMainFile(SL)) return void();
     CheckSLValidity(SL);
     std::string macroname = MacroNameTok.getIdentifierInfo()->getName().str();
-    std::size_t hash = std::hash<std::string>{}(macroname);
-    std::string newname = "ID" + std::to_string(hash);
+    std::string newname = "ID" + hashWrapper(macroname);
 
     Rewrite.ReplaceText(SourceRange(SL, SL.getLocWithOffset(MacroNameTok.getLength() - 1)), newname);
   }
@@ -619,8 +640,8 @@ int main(int argc, const char **argv) {
   int ret = Tool.run(newFrontendActionFactory<ObfFrontendAction>().get());
   CommentWiper CW(SourcePathList);
   CW.run();
-  dumpHashFilenames(hashFilenames(SourcePathList));
-  dumpDirList(listDirs("./test"));
+  //dumpHashFilenames(hashFilenames(SourcePathList));
+  //dumpDirList(listDirs("./test"));
 #endif
 #if 0
   for (auto &iter : SourcePathList) {
diff --git a/obfuscator/run.sh b/obfuscator/run.sh
index 68d0632..e988a1a 100755
--- a/obfuscator/run.sh
+++ b/obfuscator/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 cd $(dirname $0)
-"./obfuscator" ./test/test.cpp
-"./obfuscator" ./test/header.hpp --
+"./obfuscator" --shake 256 --shake_len 44 ./test/test.cpp
+"./obfuscator" --shake 256 --shake_len 44 ./test/header.hpp --
 "g++" ./FILE15118982290295364091.cpp
 #expected to return 128
 ./a.out
diff --git a/obfuscator/test/test.cpp b/obfuscator/test/test.cpp
index dc4b709..83c55e4 100644
--- a/obfuscator/test/test.cpp
+++ b/obfuscator/test/test.cpp
@@ -1,4 +1,5 @@
 #include "./header.hpp"
+//#include <iostream>
 
 #if 0
 #include <fstream>
@@ -59,6 +60,7 @@ int main(int argc, const char **argv)
 #endif
   int aa = 100;
   int bb = 28;
+  //std::cout << "maybe i passed the test.";
 
   INTMAC1;
   INTMAC2;
author	bloodstalker <thabogre@gmail.com>	2018-08-10 00:32:09 +0000
committer	bloodstalker <thabogre@gmail.com>	2018-08-10 00:32:09 +0000
commit	ce225cf41a59a3ae9366b0a0770c6f97a6f4329e (patch)
tree	725f06f377c25d66d0c18469e959ba0eefa5b109 /obfuscator
parent	WIP-added keccak-tiny to obfuscator (diff)
download	mutator-ce225cf41a59a3ae9366b0a0770c6f97a6f4329e.tar.gz mutator-ce225cf41a59a3ae9366b0a0770c6f97a6f4329e.zip