Files
CTF/HTB/forgot/ml_security.py
Simon 82b0759f1e init htb
old htb folders
2023-08-29 21:53:22 +02:00

150 lines
5.5 KiB
Python

vm#!/usr/bin/python3
import sys
import csv
import pickle
import mysql.connector
import requests
import threading
import numpy as np
import pandas as pd
import urllib.parse as parse
from urllib.parse import unquote
from sklearn import model_selection
from nltk.tokenize import word_tokenize
from sklearn.linear_model import LogisticRegression
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from tensorflow.python.tools.saved_model_cli import preprocess_input_exprs_arg_string
np.random.seed(42)
f1 = '/opt/security/lib/DecisionTreeClassifier.sav'
f2 = '/opt/security/lib/SVC.sav'
f3 = '/opt/security/lib/GaussianNB.sav'
f4 = '/opt/security/lib/KNeighborsClassifier.sav'
f5 = '/opt/security/lib/RandomForestClassifier.sav'
f6 = '/opt/security/lib/MLPClassifier.sav'
# load the models from disk
loaded_model1 = pickle.load(open(f1, 'rb'))
loaded_model2 = pickle.load(open(f2, 'rb'))
loaded_model3 = pickle.load(open(f3, 'rb'))
loaded_model4 = pickle.load(open(f4, 'rb'))
loaded_model5 = pickle.load(open(f5, 'rb'))
loaded_model6 = pickle.load(open(f6, 'rb'))
model= Doc2Vec.load("/opt/security/lib/d2v.model")
# Create a function to convert an array of strings to a set of features
def getVec(text):
features = []
for i, line in enumerate(text):
test_data = word_tokenize(line.lower())
v1 = model.infer_vector(test_data)
featureVec = v1
lineDecode = unquote(line)
lowerStr = str(lineDecode).lower()
feature1 = int(lowerStr.count('link'))
feature1 += int(lowerStr.count('object'))
feature1 += int(lowerStr.count('form'))
feature1 += int(lowerStr.count('embed'))
feature1 += int(lowerStr.count('ilayer'))
feature1 += int(lowerStr.count('layer'))
feature1 += int(lowerStr.count('style'))
feature1 += int(lowerStr.count('applet'))
feature1 += int(lowerStr.count('meta'))
feature1 += int(lowerStr.count('img'))
feature1 += int(lowerStr.count('iframe'))
feature1 += int(lowerStr.count('marquee'))
# add feature for malicious method count
feature2 = int(lowerStr.count('exec'))
feature2 += int(lowerStr.count('fromcharcode'))
feature2 += int(lowerStr.count('eval'))
feature2 += int(lowerStr.count('alert'))
feature2 += int(lowerStr.count('getelementsbytagname'))
feature2 += int(lowerStr.count('write'))
feature2 += int(lowerStr.count('unescape'))
feature2 += int(lowerStr.count('escape'))
feature2 += int(lowerStr.count('prompt'))
feature2 += int(lowerStr.count('onload'))
feature2 += int(lowerStr.count('onclick'))
feature2 += int(lowerStr.count('onerror'))
feature2 += int(lowerStr.count('onpage'))
feature2 += int(lowerStr.count('confirm'))
# add feature for ".js" count
feature3 = int(lowerStr.count('.js'))
# add feature for "javascript" count
feature4 = int(lowerStr.count('javascript'))
# add feature for length of the string
feature5 = int(len(lowerStr))
# add feature for "<script" count
feature6 = int(lowerStr.count('script'))
feature6 += int(lowerStr.count('<script'))
feature6 += int(lowerStr.count('&lt;script'))
feature6 += int(lowerStr.count('%3cscript'))
feature6 += int(lowerStr.count('%3c%73%63%72%69%70%74'))
# add feature for special character count
feature7 = int(lowerStr.count('&'))
feature7 += int(lowerStr.count('<'))
feature7 += int(lowerStr.count('>'))
feature7 += int(lowerStr.count('"'))
feature7 += int(lowerStr.count('\''))
feature7 += int(lowerStr.count('/'))
feature7 += int(lowerStr.count('%'))
feature7 += int(lowerStr.count('*'))
feature7 += int(lowerStr.count(';'))
feature7 += int(lowerStr.count('+'))
feature7 += int(lowerStr.count('='))
feature7 += int(lowerStr.count('%3C'))
# add feature for http count
feature8 = int(lowerStr.count('http'))
# append the features
featureVec = np.append(featureVec,feature1)
featureVec = np.append(featureVec,feature2)
featureVec = np.append(featureVec,feature3)
featureVec = np.append(featureVec,feature4)
featureVec = np.append(featureVec,feature5)
featureVec = np.append(featureVec,feature6)
featureVec = np.append(featureVec,feature7)
featureVec = np.append(featureVec,feature8)
features.append(featureVec)
return features
# Grab links
conn = mysql.connector.connect(host='localhost',database='app',user='diego',password='dCb#1!x0%gjq')
cursor = conn.cursor()
cursor.execute('select reason from escalate')
r = [i[0] for i in cursor.fetchall()]
conn.close()
data=[]
for i in r:
data.append(i)
Xnew = getVec(data)
#1 DecisionTreeClassifier
ynew1 = loaded_model1.predict(Xnew)
#2 SVC
ynew2 = loaded_model2.predict(Xnew)
#3 GaussianNB
ynew3 = loaded_model3.predict(Xnew)
#4 KNeighborsClassifier
ynew4 = loaded_model4.predict(Xnew)
#5 RandomForestClassifier
ynew5 = loaded_model5.predict(Xnew)
#6 MLPClassifier
ynew6 = loaded_model6.predict(Xnew)
# show the sample inputs and predicted outputs
def assessData(i):
score = ((.175*ynew1[i])+(.15*ynew2[i])+(.05*ynew3[i])+(.075*ynew4[i])+(.25*ynew5[i])+(.3*ynew6[i]))
if score >= .5:
try:
preprocess_input_exprs_arg_string(data[i],safe=False)
except:
pass
for i in range(len(Xnew)):
t = threading.Thread(target=assessData, args=(i,))
# t.daemon = True
t.start()