summaryrefslogtreecommitdiffstats
path: root/travnik.py
blob: 98ef4492f4c1a70b50888455f4abce57394ed5b4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from bencodepy import decode
from enum import Enum
from hashlib import sha1, sha256
from os import scandir
from re import search, IGNORECASE
class Type(Enum):
	UNDEF = 0,
	V1 = 1,
	V2 = 2,
	HYBRID = 3
class Torrent():
	def __init__(self):
		self.sha1 = b''
		self.files = {}
		self.type = Type.UNDEF
	def file(self, f):
		self.parse(open(f, "rb").read())
	def parse(self, b):
		infodict = b[b.find(b'4:info')+6:b.rfind(b'6:sourced2:ip')]
		self.sha1 = sha1(infodict).digest()
		self.sha256 = sha256(infodict).digest()
		self.dict = decode(b)
		if b'pieces' in self.dict.get(b'info'):
			self.dict.get(b'info').pop(b'pieces')
		if b'files' in self.dict.get(b'info').keys():
			self.type = Type.V1
			for file in self.dict.get(b'info').get(b'files'):
				if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path'):
					continue
				def insert_file(d, path, length, self):
					name = path.pop()
					if not len(path):
						d[name] = length
						return
					if name not in d.keys():
						d[name] = {}
					insert_file(d[name], path, length, self)
				file.get(b'path').reverse()
				insert_file(self.files, file.get(b'path'), file.get(b'length'), self)
			self.dict.get(b'info').pop(b'files')
		if b'file tree' in self.dict.get(b'info').keys(): # some torrents have broken file trees so we use files first
			if self.type is Type.V1:
				self.type = Type.HYBRID
			else:
				def filetree(names):
					r = {}
					for key in names.keys():
						if key == b'':
							return names.get(key).get(b'length')
						r[key] = filetree(names.get(key))
					return r
				self.files = filetree(self.dict.get(b'info').get(b'file tree'))
				self.dict.get(b'info').pop(b'file tree')
		if not len(self.files):
			self.type = Type.V1
			self.files[self.dict.get(b'info').get(b'name')] = self.dict.get(b'info').get(b'length')
		first_filename = [i for i in self.files.keys()][0]
		if len(self.files) == 1 and self.files[first_filename] == {}:
			print("fixed bad single file torrent", self.sha1.hex())
			self.files[first_filename] = self.dict.get(b'info').get(b'length')
	def paths(self):
		def paths_r(d, path=None):
			if path is None:
				path = []
			for f in d.keys():
				if type(d[f]) is int:
					z = path.copy()
					z.append(f)
					yield z, d[f]
				else:
					z = path.copy()
					z.append(f)
					for z, v in paths_r(d[f], z):
						yield z, v
		for z, v in paths_r(self.files):
			yield z, v
	def matches(self, r):
		try:
			decoded = self.dict.get(b'info').get(b'name').decode()
		except UnicodeDecodeError:
			decoded = self.dict.get(b'info').get(b'name').decode("iso-8859-2")
		if search(r, decoded, IGNORECASE):
			return True
		for path, size in self.paths():
			try:
				decd = b'/'.join(path).decode()
			except UnicodeDecodeError:
				decd = b'/'.join(path).decode("iso-8859-2")
			if search(r, decd, IGNORECASE):
				return True
		return False
	def matching_files(self, r, decode=False):
		def matching_files_r(dirc, r, decode):
			files = {}
			for name, content in dirc.items():
				try:
					decoded = name.decode()
				except UnicodeDecodeError:
					decoded = name.decode("iso-8859-2") # TODO we could try detecting the encoding
				if search(r, decoded, IGNORECASE):
					files[decoded if decode else name] = content
				if type(content) is dict:
					inhalt = matching_files_r(content, r, decode)
					if inhalt:
						files[decoded if decode else name] = inhalt
			return files
		return matching_files_r(self.files, r, decode)
	def __repr__(self):
		return str(self.__dict__)
	def __hash__(self):
		if len(self.sha1):
			return int.from_bytes(self.sha1, byteorder="big")
		return id(self)
def glob(d):
	r = {}
	for f in scandir(d):
		if f.name.endswith(".torrent") and f.is_file():
			t = Torrent()
			t.file(f.path)
			r[t.sha1] = t
	return r