summaryrefslogblamecommitdiffstats
path: root/prog/sear.php/index.php
blob: 2e695c53df9f4d53e518906fbc7d5932eafce5d7 (plain) (tree)



















































                                                                                                                                                                                                              
                                                                                          



                                                                                                     
                                                                                       


                               

                                                             
































                                                                                                    
                                                                                              
                                             


                                                                                       



                                                                                        












                                                                                                                                                                          
                                                                                                                    
 
                                                                                                                       


















                                                                                                 
                                                         
                                                                                          

                                                                          
                                                                                                                  












                                                                                                                        

                                                                                                                                                                               
                             
                                   














































                                                                                                                     












                                                                                                                        














                                                                                                                   
                                                                                                                                                  
                                                                              
                                                                                                                        
 















                                                                   

                                       
                                                                                       



                                                                  







                                                   

                                                              

                         

                                        















                                                            
                                                         


                                         



















                                                                 
                                                                                                                                                                                                   
 
                                                 

                                                                                                                                        











                                                                                                                   
                                                        



                                                                                                                                                                                                                     
                                        

                                                                                                                                                                                                                                                                                                                                                                            
                                

                                                                                                                                        
                                                                                                         
  
<?php
function find_class ($haystack, $definition) {
	$pos = stripos($haystack, $definition);
	if ($pos === false)
		return false;
	while ($haystack[$pos] != ".")
		$pos--;
	$endofclass = ++$pos;
	while (ctype_alnum($haystack[$endofclass]))
		$endofclass++;
	return substr($haystack, $pos, $endofclass-$pos);
}
function fix_url ($h) {
	if (str_starts_with($h, "/url?q=")) {
		$h = substr($h, strlen("/url?q="));
		$h = substr($h, 0, strpos($h, "&"));
		$h = urldecode($h);
	}
	if (strpos($h, "googleweblight.com/fp?u=") !== false) {
		$h = substr($h, strpos($h, "googleweblight.com/fp?u=")+strlen("googleweblight.com/fp?u="));
		$h = substr($h, 0, strpos($h, "&"));
		$h = urldecode($h);
	}
	return $h;
}
function only_text_content ($node) {
	$r = "";
	foreach($node->childNodes as $n)
		if ($n->nodeType == XML_TEXT_NODE)
			$r .= $n->nodeValue;
	return $r;
}
function query_google ($s, $image = false, $noredirect = false, $bindstring = false) {
	$us = urlencode($s);
	$a = "";
	if ($image)
		$a .= "&tbm=isch";
	if ($noredirect)
		$a .= "&nfpr=1";
	$url = "http://wap.google.com/search?q=$us&num=100&ie=UTF-8$a";
	$sockarr = [];
	if ($bindstring)
		$sockarr["bindto"] = $bindstring;
	$txtdoc = file_get_contents($url, false, stream_context_create(["socket" => $sockarr, "http" => ["ignore_errors" => true, "header" => "User-Agent: Nokia WAP Gateway 4.1 CD1/ECD13_D/4.1.04)\r\n"]]));
	if ($txtdoc == false)
		return ["status" => false, "code" => "failed"];
	if (str_contains($txtdoc, "In the meantime, solving the above CAPTCHA will let you continue"))
		return ["status" => false, "code" => "captcha", "url" => $url];
	$resultsforclass = find_class($txtdoc, "{color:#1967d2}");
	if ($image) {
		$imageclass = find_class($txtdoc, "{font-family:Roboto,Helvetica,Arial,sans-serif}");
		if (!$imageclass)
			return ["status" => false, "code" => "noimgclass", "url" => $url];
	} else {
		$titleclass = find_class($txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}");
		$descclass = find_class($txtdoc, "{word-break:break-word}");
		if (!$titleclass || !$descclass)
			return ["status" => false, "code" => "noclass", "url" => $url];
	}
	$results = [];
	$x = new DOMDocument();
	// file_put_contents("/tmp/x", $txtdoc);
	$txtdoc = mb_convert_encoding($txtdoc, "ISO-8859-1");
	$x->loadHTML($txtdoc);
	if ($image)
		foreach ($x->getElementsByTagName("div") as $div) {
			if ($div->getAttribute("class") != $imageclass)
				continue;
			$hreflink = $div->getElementsByTagName("a")[0]->getAttribute("href");
			if (!$hreflink)
				continue;
			preg_match(",.*s?imgurl=([^&]*)&imgrefurl=([^&]*).*,", $hreflink, $matches);
			$imgurl = $matches[1];
			$imgrefurl = $matches[2];
			if (!$imgurl && !$imgrefurl)
				continue;
			$imgurl = urldecode(fix_url($imgurl));
			$imgrefurl = urldecode(fix_url($imgrefurl));
			$results[] = ["imgsrc" => $imgurl, "url" => $imgrefurl];
		}
	else
		foreach ($x->getElementsByTagName("a") as $a) {
			if (!$a->getAttribute("class"))
				continue;
			if (!str_contains($a->getAttribute("class"), $titleclass))
				continue;
			$hreflink = fix_url($a->getAttribute("href"));
			if (!$hreflink)
				continue;
			$firstspan = null;
			foreach ($a->getElementsByTagName("span") as $span) {
				if ($firstspan == null)
					$firstspan = $span;
				if ($span->getAttribute("class") == $descclass)
					$breadcrumbs = $span->nodeValue;
			}
			$table = $a->parentNode->parentNode->getElementsByTagName("table")[0];
			$description = false;
			if ($table)
				foreach ($table->getElementsByTagName("span") as $span)
					if ($span->getAttribute("class") == $descclass)
						if (!$description)
							$description = $span->nodeValue;
			if (!$description)
				$description = "ni mi uspelo izluščiti opisa";
			$results[] = ["url" => $hreflink, "title" => $firstspan->nodeValue, "breadcrumbs" => $breadcrumbs, "description" => $description];
		}
	$suggested = [];
	if ($resultsforclass)
		foreach ($x->getElementsByTagName("a") as $a)
			if (str_contains($a->getAttribute("class"), $resultsforclass))
				if (str_contains($a->getAttribute("href"), "&spell=1&") || (str_contains($a->getAttribute("href"), "&nfpr=1&") && sizeof($suggested) > 0))
					$suggested[] = $a->nodeValue;
	if (sizeof($suggested) > 1 && !$noredirect) // aaaaaaaaaaa tukaj obstaja še ... "Skupaj z rezultati za" response!!!!!!!!
		$s = $suggested[0];
	$suggestion = false;
	if (sizeof($suggested) == 1 || ($noredirect && sizeof($suggested) > 0))
		$suggestion = $suggested[0];
	return ["status" => true, "query" => $s, "suggestion" => $suggestion, "results" => $results, "url" => $url];
}
function template ($title, $queryinfo, $body, $query = "", $additionalform = "", $imgfirst = false, $add_footer = "") {
	$query = htmlspecialchars($query);
	$title = htmlspecialchars($title);
	$buttons = "
		<button type=submit value=🔍 ><span>=&gt;</span></button>
		<button accesskey=f type=submit name=f value=Ʊ ><span>1.</span></button>
		<button accesskey=i type=submit name=i value=🖼><span>[^]</span></button>
	";
	if ($imgfirst) {
		$buttons = "
			<button type=submit name=i value=🖼><span>[^]</span></button>
			<button accesskey=i type=submit value=🔍 ><span>=&gt;</span></button>
			<button accesskey=f type=submit name=f value=Ʊ ><span>1.</span></button>
		";
	}
	return "
	<!DOCTYPE html>
	<html lang=sl>
		<head>
			<meta charset=UTF-8>
			<title>$title :: sear.php</title>
			<meta name=viewport content='width=device-width, initial-scale=1'>
			<link rel=stylesheet href=/css.css>
			<link rel=icon type=image/x-icon href=favicon.ico>
			<link title=sear.php rel=search type=application/opensearchdescription+xml href=/osdd.xml>
		</head>
		<body>
			<form class=container action=.>
				<input accesskey=s type=text name=q value='$query' placeholder='sear.php ...' size=50 />
				$buttons
				$additionalform
			</form>
			<h3>
				$queryinfo
			</h3>
			$body
			<hr>
			<h4 align=center>
				<a href=//ni.šijanec.eu./sijanec/r/tree/prog/sear.php>sear.php</a>
				<a href=javascript:window.external.AddSearchProvider(window.location.origin+'/osdd.xml') id=r hidden=hidden ><!--registriraj v brskalnik--></a>
			</h4>
			$add_footer
			<script>
				if (typeof window.external.AddSearchProvider === 'function')
					document.getElementById('r').hidden = false;
			</script>
		</body>
	";
}
function results_html ($results, $plaintext, $limit) {
	$r = "";
	$i = 0;
	foreach ($results as $result) {
		if ($limit && $i == $limit)
			return $r;
		$safeurl = htmlspecialchars($result["url"]);
		if ($plaintext)
			$safeurl = htmlspecialchars(str_replace("https:", "http:", $result["url"], 1));
		if (isset($result["imgsrc"])) {
			$safeimgsrc = htmlspecialchars($result["imgsrc"]);
			if ($plaintext)
				$safeimgsrc = htmlspecialchars(str_replace("https:", "http:", $result["imgsrc"], 1));
			$r .= "
				<a id=result$i class=result href='$safeurl'>
					<img src='$safeimgsrc' />
				</a>
			";
		} else {
			$safetitle = trim(htmlspecialchars($result["title"]));
			$safebreadcrumb = htmlspecialchars($result["breadcrumbs"]);
			$safedesc = htmlspecialchars($result["description"]);
			$r .= "
				<div id=result$i class=result>
					<h4>
						<a href='$safeurl' accesskey=$i>$safetitle</a> 
						<span class=breadcrumb>
							$safebreadcrumb
						</span>
					</h4>
					<p>
						$safedesc
					</p>
				</div>
			";
		}
		$i++;
	}
	return $r;
}
function samostalnik ($n, $ednina, $dvojina, $trojina, $mno) {
	switch ($n % 100) {
		case 1:
			return $ednina;
		case 2:
			return $dvojina;
		case 3:
		case 4:
			return $trojina;
	}
	return $mno;
}
function handle_response ($query, $response, $limit, $plaintext, $horseshoe, $add_url, $add_form, $image, $add_footer) {
	if (sizeof($response["results"]) == 0)
		die(template($query, "ni rezultatov", "vaše iskanje ni obrodilo sadov.", $query, "", $image));
	if ($horseshoe)
		header("Location: " . $response["results"][0]["url"]);
	$queryinfo = "";
	if ($response["query"] != $query) {
		$safequeryurl = urlencode($query);
		$safequeryhtml = htmlspecialchars($query);
		$queryinfo .= "preusmeril sem vas iz <a href='?e=e$add_url&q=$safequeryurl'>$safequeryhtml</a> | ";
	}
	if ($response["suggestion"] != false) {
		$safequeryurl = urlencode($response["suggestion"]);
		$safequeryhtml = htmlspecialchars($response["suggestion"]);
		$queryinfo .= "predlagam iskanje <a href='?e=e$add_url&q=$safequeryurl'>$safequeryhtml</a> | ";
	}
	$queryinfo .= sizeof($response["results"]) . " " . samostalnik(sizeof($response["results"]), "zadetek", "zadetka", "zadetki", "zadetkov");
	$resultshtml = results_html($response["results"], $plaintext, $limit);
	die(template($response["query"], $queryinfo, $resultshtml, $response["query"], $add_form, $image, $add_footer));
}
function try_query_google ($q, $image, $exact) {
	$bindstrings = [];
	foreach (net_get_interfaces() as $interface)
		foreach ($interface["unicast"] as $unicast) {
			if (!isset($unicast["address"]))
				continue;
			$address = $unicast["address"];
			if ($address == "::1") // tu timeouta
				continue;
			$bindstring = "$address:0";
			if (str_contains($address, ":"))
				$bindstring = "[$address]:0";
			$bindstrings[] = $bindstring;
		}
	if (getenv("BINDSTRINGS"))
		$bindstrings = explode(",", getenv("BINDSTRINGS"));
	$bindstring_timings = [];
	foreach ($bindstrings as $bs) {
		// file_put_contents("/dev/stderr", "trying bindstring $bindstring\n");
		$start_ns = hrtime(true);
		$response = query_google($q, $image, $exact, $bs);
		$bindstring_timings[$bs] = hrtime(true)-$start_ns;
		$response["bindstring"] = $bs;
		if ($response["status"] == true)
			break;
		if ($response["code"] == "captcha")
			continue;
		if ($response["code"] == "failed")
			continue;
		break;
	}
	$response["bindstrings"] = $bindstrings;
	$response["bindstring_timings"] = $bindstring_timings;
	return $response;
}
if (!isset($_REQUEST["debug"]))
	ini_set('display_errors','Off');
$add_form = "";
$add_url = "";
$q = null;
if (isset($_REQUEST["q"]))
	$q = $_REQUEST["q"];
$image = false;
if (!empty($_REQUEST["i"]))
	$image = true;
$exact = false;
if (!empty($_REQUEST["e"])) {
	$add_url .= "&e=e";
	$exact = true;
	$add_form .= "<input type=hidden name=e value=e />";
}
if (!empty($_REQUEST["json"])) {
	header("Content-Type: application/json");
	$response = try_query_google($q, $image, $exact);
	if ($response["status"] == false)
		http_response_code(500);
	die(json_encode($response));
}
$limit = false;
if (!empty($_REQUEST["l"])) {
	$limit = int($_REQUEST["l"]);
	$add_url .= "&l=$limit";
	$add_form .= "<input type=hidden name=l value=$limit />";
}
$plaintext = false;
if (!empty($_REQUEST["h"])) {
	$plaintext = true;
	$add_url .= "&h=h";
	$add_form .= "<input type=hidden name=h value=h />";
}
$horseshoe = false;
if (!empty($_REQUEST["f"])) {
	$horseshoe = true;
	$add_url .= "&f=f";
	$add_form .= "<input type=hidden name=f value=f />";
}
if ($q == null) {
	die(template("", "iskalnik sear.php", "dobrodošli v iskalniku sear.php, nasledniku programa sear.c. za iskanje po spletu nekaj vnesite v iskalno vrstico zgoraj in pritisnite na gumb."));
}
$response = try_query_google($q, $image, $exact);
$ms = $response["bindstring_timings"][$response["bindstring"]]/(1000*1000);
$add_footer = "<hr>razhroščevanje: <a href='" . htmlspecialchars($response["url"]) . "'>" . $response["bindstring"] . "</a> v $ms ms";
if ($response["status"] != true) {
	if ($response["code"] == "captcha") {
		foreach (["http://searc.oliwerix.com/sear.php?ref=b"] as $fallback) {
			$add_url = "";
			if ($exact)
				$add_url .= "&e=e";
			$json = json_decode(file_get_contents($fallback . $add_url . "&q=" . urlencode($q)), true);
			if (!isset($json["status"]))
				continue;
			if ($json["status"] == false) {
				if ($json["code"] == "captcha")
					continue;
				http_response_code(500);
				die(template("napaka", "napaka pri posredovanju zahteve", "ker je poizvedba vrnila captcho, sem jo posredoval na drug strežnik, ta pa je vrnil napako " . $json["code"] . ".", $q));
			}
			handle_response($q, $json, $limit, $plaintext, $horseshoe, $add_url, $add_form, $image);
		}
		http_response_code(500);
		die(template("napaka", "iskalno zaledje omejuje iskanja", "iskalno zaledje se je odzvalo s CAPTCHO, češ, da ta instanca prehitro pošilja zahteve. obenem se je enako zgodilo tudi vsem rezervnim strežnikom. iščete lahko ročno preko spletne strani zaledne storitve: <a href='" . htmlspecialchars($response["url"]) . "'>kliknite sem</a>.", $q));
	}
	http_response_code(500);
	die(template("napaka", "napaka pri poizvedbi", "poizvedba na iskalno zaledje je vrnila napako " . $response["code"] . ".", $q));
}
handle_response($q, $response, $limit, $plaintext, $horseshoe, $add_url, $add_form, $image, $add_footer);
?>