from bencodepy import decode from enum import Enum from hashlib import sha1, sha256 from os import scandir from re import search, IGNORECASE class Type(Enum): UNDEF = 0, V1 = 1, V2 = 2, HYBRID = 3 class Torrent(): def __init__(self): self.sha1 = b'' self.files = {} self.type = Type.UNDEF self.cache = None self.hadpieces = False def file(self, f): self.parse(open(f, "rb").read()) def parse(self, b): infodict = b[b.find(b'4:info')+6:b.rfind(b'6:sourced2:ip')] self.sha1 = sha1(infodict).digest() self.sha256 = sha256(infodict).digest() self.dict = decode(b) if b'pieces' in self.dict.get(b'info'): self.dict.get(b'info').pop(b'pieces') self.hadpieces = True if b'files' in self.dict.get(b'info').keys(): self.type = Type.V1 for file in self.dict.get(b'info').get(b'files'): if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path') or b'_____padding_file_' in b'/'.join(file.get(b'path')): continue def insert_file(d, path, length, self): name = path.pop() if not len(path): d[name] = length return if name not in d.keys(): d[name] = {} insert_file(d[name], path, length, self) file.get(b'path').reverse() insert_file(self.files, file.get(b'path'), file.get(b'length'), self) self.dict.get(b'info').pop(b'files') if b'file tree' in self.dict.get(b'info').keys(): # some torrents have broken file trees so we use files first if self.type is Type.V1: self.type = Type.HYBRID else: self.type = Type.V2 def filetree(names): r = {} for key in names.keys(): if key == b'': return names.get(key).get(b'length') r[key] = filetree(names.get(key)) return r self.files = filetree(self.dict.get(b'info').get(b'file tree')) self.dict.get(b'info').pop(b'file tree') if not len(self.files): self.type = Type.V1 self.files[self.dict.get(b'info').get(b'name')] = self.dict.get(b'info').get(b'length') first_filename = [i for i in self.files.keys()][0] if self.type == Type.V2 and self.hadpieces: self.type = Type.HYBRID; if len(self.files) == 1 and self.files[first_filename] == {}: print("fixed bad single file torrent", self.sha1.hex()) self.files[first_filename] = self.dict.get(b'info').get(b'length') def paths(self): def paths_r(d, path=None): if path is None: path = [] for f in d.keys(): if type(d[f]) is int: z = path.copy() z.append(f) yield z, d[f] else: z = path.copy() z.append(f) for z, v in paths_r(d[f], z): yield z, v for z, v in paths_r(self.files): yield z, v def matches(self, r, cache=False): does = False if cache and self.cache: return search(r, self.cache, IGNORECASE) try: decoded = self.dict.get(b'info').get(b'name').decode() except UnicodeDecodeError: decoded = self.dict.get(b'info').get(b'name').decode("iso-8859-2") except AttributeError: decoded = str(self.dict.get(b'info').get(b'name')) if search(r, self.dict.get(b'source').get(b'ip').decode(), IGNORECASE): does = True if not cache: return True if search(r, decoded, IGNORECASE): does = True if not cache: return True if cache: self.cache = self.dict.get(b'source').get(b'ip').decode() + "|" + decoded + "|" for path, size in self.paths(): try: decd = b'/'.join(path).decode() except UnicodeDecodeError: decd = b'/'.join(path).decode("iso-8859-2") self.cache += decd + "|" if search(r, decd, IGNORECASE): does = True if not cache: return True return does def matching_files(self, r, decode=False): def matching_files_r(dirc, r, decode): files = {} for name, content in dirc.items(): try: decoded = name.decode() except UnicodeDecodeError: decoded = name.decode("iso-8859-2") # TODO we could try detecting the encoding if search(r, decoded, IGNORECASE): files[decoded if decode else name] = content if type(content) is int else {} if type(content) is dict: inhalt = matching_files_r(content, r, decode) if inhalt: files[decoded if decode else name] = inhalt return files return matching_files_r(self.files, r, decode) def __repr__(self): return str(self.__dict__) def __hash__(self): if len(self.sha1): return int.from_bytes(self.sha1, byteorder="big") return id(self) def glob(d): r = {} for f in scandir(d): try: if f.name.endswith(".torrent") and f.is_file(): t = Torrent() t.file(f.path) r[t.sha1] = t except Exception as e: print(f"skipping broken torrent {f.name} due to exception:") print(e) return r