base64

base64编码导成二进制文件

1
2
3
4
5
6
7
8
9
10
11
12
import base64
import binascii

clzBytecodeBase64Str = "xxx"
newstr = base64.b64decode(clzBytecodeBase64Str)
print(type(newstr))

with open('somefile.bin', 'wb') as f:
f.write(newstr)

print("输出成功")

后台扫描

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# -*- coding:utf-8 -*-
import urllib
import time
import threading

url = raw_input("输入你要扫描的网址:")
txt = raw_input("输入字典(php.txt):")

# 保存存在的后台地址
open_url = []
all_url = []

# 建立线程列表
threads = []

# 从字典中读取每一行与url组合,然后添加到all_url
def search_url(url,txt):
with open(txt,'r') as f:
for each in f:
each = each.replace('\n','')
urllist = url + each
all_url.append(urllist)

# 处理url
def handle_url(urllist):
print ("find:"+urllist+'\n')
try:
req = urllib.urlopen(urllist)

# 判断返回码
if req.getcode() == 200:
open_url.append(urllist)

if req.getcode() == 301:
open_url.append(urllist)

except:
pass


# 主函数
def main():
search_url(url,txt)
# 多线程处理
for each in all_url:
t = threading.Thread(target= handle_url, args=(each,))
threads.append(t)
t.start()

# 线程等待

for t in threads:
t.join()

# 扫描成功和不成功的回显

if open_url:
print ("扫描成功,网站后台地址为:")
for each in open_url:
print ("[+]"+each)

else:
print ("没有扫到网站后台,字典不给力啊")

if __name__ =="__main__":

# 判断运行时间
start = time.clock()
main()
end = time.clock()
print ("The function spend time is %.3f seconds" %(end-start))

进制转换

hex-str

1. 字符串转 hex 字符串

字符串 >> 二进制 >> hex >> hex 字符串

1
2
3
def str_to_hexStr(string):
str_bin = string.encode('utf-8')
return binascii.hexlify(str_bin).decode('utf-8')

2. hex 字符串转字符串

hex 字符串 >> hex >> 二进制 >> 字符串

1
2
3
4
def hexStr_to_str(hex_str):
hex = hex_str.encode('utf-8')
str_bin = binascii.unhexlify(hex)
return str_bin.decode('utf-8')

bytes-str

str>>bytes

1
2
3
4
5
a = "demo"
b = a.encode("utf-8")
b = bytes(a.encode("utf-8"))
b = bytes(a,encoding="utf-8")
print(b)

bytes>>str

1
c=b.decode("utf-8")  或  c=str(b,encoding="utf-8")

十进制–十六进制

十六进制–>十进制

1
2
3
int('0xa',16)         #10
int('ff',16) #255
int('f',16) #15

十进制–>十六进制

1
2
3
hex(10)    #'0xa'
hex(11) #'0xb'
hex(15) #'0xf'

十进制–二进制

十进制–>二进制

1
2
3
bin(2)                #'0b10'
bin(255) #'0b11111111'
bin(8) #'0b1000'

二进制–>十进制

1
int('0b11111111', 2)

十进制–八进制

十进制–>八进制

1
2
3
oct(8)                #'0o10'
oct(7) #'0o7'
oct(9) #'0o11'

八进制–>十进制

1
int('0o11', 8)

字符串转换成整数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#十进制字符串转整数
int('10') #10
int('20') #20
#十六进制字符串转整数
int('f',16) #15
int('ff',16) #255
int('a',16) #10
int('11',16) #17
#二进制字符串转整数
int('1000',2) #8
int('1001',2) #9
#字节串转整数
import struct
struct.unpack('>II',bytes(b'\x01\x00\x00\x00\x01\x00\x00\x00')) #(16777216, 16777216)
#整数转换成字节串
struct.pack('>II',16777216, 16777216) #b'\x01\x00\x00\x00\x01\x00\x00\x00'

字符串转字节串

1
2
3
4
5
6
7
8
9
10
#字符串编码为字节码
'123abc'.encode('ascii') #b'123abc'
'123abc'.encode('utf-8') #b'123abc'
'123abc好'.encode('utf-8') #b'123abc\xe5\xa5\xbd'
#十六进制字符串转字节串
bytes.fromhex('ff011a') #b'\xff\x01\x1a'
#十六进制字符串转字节串
bytes(map(ord,'\xff\x01\x1a')) #b'\xff\x01\x1a'
#十六进制数组转字节串
bytes([0x01,0x02,0x03,0xff]) #b'\x01\x02\x03\xff'

字节串转字符串

1
2
3
4
5
6
7
#字节码解码为字符串
bytes(b'123abc\xe5\xa5\xbd').decode('utf-8') #'123abc好'
#字节串转16进制表示
str(bytes(b'123abc\xe5\xa5\xbd')) #"b'123abc\\xe5\\xa5\\xbd'"
str(bytes(b'123abc\xe5\xa5\xbd'))[2:-1] #'123abc\\xe5\\xa5\\xbd'
#字节串转换成16进制数组
[hex(i) for i in bytes(b'123')] #['0x31', '0x32', '0x33']

字符串–二进制串(01形式表示)​

1
2
3
4
5
6
7
8
9
def encode(s):
return ' '.join([bin(ord(c)).replace('0b', '') for c in s])

def decode(s):
return ''.join([chr(i) for i in [int(b, 2) for b in s.split(' ')]])
encode('hello')
# '1101000 1100101 1101100 1101100 1101111'
decode('1101000 1100101 1101100 1101100 1101111')
# 'hello'
1
2
3
4
1. >>> bin(int('256', 10))
2. '0b100000000'
3. >>> str(int('0b100000000', 2))
4. '256'

selenium 实现绕过前端js加密和验证码识别

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from selenium import webdriver
from time import sleep
from PIL import Image
import ddddocr

driver = webdriver.Chrome("G:\\pytest\\chromedriver.exe")
driver.get("http://www.xxx.xxx.cn")
sleep(1)
# 进行标签id定位用户框
UserName_input = driver.find_element_by_id('UserName')
# 向用户框中录入用户名
UserName_input.send_keys("admin")
# 进行标签id定位密码框
Password_input = driver.find_element_by_id('Password')
# 向密码框中录入密码
Password_input.send_keys("admin@12345")

# 获取验证码1(验证码应用非嵌入,备选)
# code = driver.find_element_by_id("SecurityCode").get_attribute("src")
# print(code) # http://www.xmis.org.cn:8085/SecurityCode.jsp?Height=50&Width=150

# 获取验证码2(截图识别)
# 获取截图(截取全部)
driver.get_screenshot_as_file('G:\\pytest\\screenshot.png')
# 截取验证码部分(根据测试内容做出位置调整)
left = 800
top = 500
right = left + 214
bottom = top + 77
photo = Image.open('G:\\pytest\\screenshot.png')
photo = photo.crop((left, top, right, bottom))
photo.save('G:\\pytest\\full_code.png')
ocr = ddddocr.DdddOcr()
with open('full_code.png', 'rb') as f:
img_bytes = f.read()
res = ocr.classification(img_bytes)
print(res)

# 进行标签VerifyCode定位验证码框
VerifyCode_input = driver.find_element_by_id('VerifyCode')
# 向密码框中录入验证码
VerifyCode_input.send_keys(res)

# 模拟点击登录
Login_click = driver.find_element_by_xpath('//*[@id="LoginImg"]')
Login_click.click()

# 捕获响应包验证是否暴力破解成功
try:
response = [driver.page_source]
except Exception as ex:
print("出现如下异常{}".format(ex))

爬取网站下载href标签内的文件并重命名

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import requests
import re
import wget
import os
from pathlib import Path

headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
'Cookie': 'PHPSESSID=69fk4bobxxxxxxxxbjm20; XXZ_ZLK_admin_username=username',
}
url = 'https://www.xxxxx.cn/admin.html'
session = requests.Session()
list_urls = []
path = 'E:\\xx要的全部测试'
for page in range(7, 8):
print('************************************正在爬取第{}*********************************'.format(page))
Target_url = 'https://www.xxxxx.cn/admin/means/index.html?page={}'.format(page)
response = session.get(Target_url, headers=headers)
#print(response.status_code)
print('页面url==='+str(Target_url)+'状态码'+str(response.status_code))
# print(response.text)

content = response.text
urls = re.findall(r"<a.*?href=.*?<\/a>", content, re.I | re.S | re.M)
keywords = "https://yyy.xxxxx.cn/means/file/"
r_url = 0
for url in urls:
if keywords in url:
r_url += 1
print('正在写入第{}'.format(r_url))
list_urls.append(url)


n_urls = 0
for i in list_urls:
file_url_res = r"(?<=href=\").+?(?=\")|(?<=href=\').+?(?=\')"
file_url = re.findall(file_url_res, i, re.I | re.S | re.M)
#print(file_url[0])
file_name_res = r"(?<=>).*?(?=<)"
file_name = re.findall(file_name_res, i, re.I | re.S | re.M)
#print(file_name[0])
n_urls += 1
print("开始下载第" + str(n_urls) + "条")
filename = wget.download(file_url[0], path)
filename_suffix = os.path.splitext(filename)[-1]
try:
os.rename(filename, path + '/' + file_name[0] + filename_suffix)
except:
pass

PDF文件拼接

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import os
from PyPDF2 import PdfMerger

# 指定要合并的PDF文件夹路径
pdf_folder = "C:\\Users\\xxx\\Desktop\\pdf"

# 获取文件夹中的PDF文件列表
pdf_files = [f for f in os.listdir(pdf_folder) if f.endswith(".pdf")]

# 创建一个PDF合并对象
pdf_merger = PdfMerger()

# 打开要合并的PDF文件
for file in pdf_files:
with open(os.path.join(pdf_folder, file), 'rb') as f:
pdf_merger.append(f)

# 合并PDF文件并保存
output_file = "C:\\Users\\xxx\\Desktop\\pdf"
with open(output_file, 'wb') as f:
pdf_merger.write(f)

# 关闭合并对象
pdf_merger.close()

print("PDF文件已成功合并为:", output_file)