-
Notifications
You must be signed in to change notification settings - Fork 3
/
scrape-hnhh.php
52 lines (40 loc) · 1.3 KB
/
scrape-hnhh.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
<?php
$target_url = "http://www.hotnewhiphop.com/";
$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';
function store_link($url) {
mysql_connect('localhost', 'devsquid_hhg', 'SAVAGERY99');
mysql_select_db('devsquid_hhg');
$query = sprintf("INSERT INTO songs (referral,url,date_added) VALUES ('hotnewhiphop', '%s', now())", mysql_real_escape_string($url));
echo $query, "\n";
//mysql_query($query);
}
// make the cURL request to $target_url
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch, CURLOPT_URL,$target_url);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$html= curl_exec($ch);
if (!$html) {
echo "<br />cURL error number:" .curl_errno($ch);
echo "<br />cURL error:" . curl_error($ch);
exit;
}
// parse the html into a DOMDocument
$dom = new DOMDocument();
@$dom->loadHTML($html);
// grab all the on the page
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
for ($i = 0; $i < $hrefs->length; $i++) {
$href = $hrefs->item($i);
$url = $href->getAttribute('href');
if(strpos($url,'limelinx')) {
echo "\nLink stored: $url";
store_link($url);
}
}
?>