利用PHP+C#製作一個Ajax網頁渲染器 撰寫簡易台股外資買賣超資料爬蟲程式[CS_PHP_spider]
利用PHP+C#製作一個Ajax網頁渲染器 撰寫簡易台股外資買賣超資料爬蟲程式[CS_PHP_spider]
GITHUB: https://github.com/jash-git/CS_PHP_spider
可執行完整檔案: 下載
相關資料:
買賣超排行榜
https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZGK_D.djhtm
買超
https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_D.djhtm
https://tw.stock.yahoo.com/d/i/fgbuy_tse.html
賣超
https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_DA.djhtm
https://tw.stock.yahoo.com/d/i/fgsell_tse.html
每日股價
https://fubon-ebrokerdj.fbs.com.tw/z/zc/zcw/zcw1_3037.djhtm
https://tw.stock.yahoo.com/q/bc?s=3037
看來是這一篇
http://smart.businessweekly.com.tw/Magazine/detail.aspx?id=63709&s=Books
https://today.line.me/tw/v2/article/%E5%8F%B0%E8%82%A1%E6%8A%95%E8%B3%87%E5%BF%85%E7%9C%8B%E7%B1%8C%E7%A2%BC%E9%9D%A2%EF%BC%9A%E6%95%99%E4%BD%A0%E7%94%A8Excel%E8%A8%98%E9%8C%84%E8%B3%87%E9%87%91%E6%B5%81%E5%90%91%EF%BC%8C%E8%B7%9F%E8%91%97%E4%B8%BB%E5%8A%9B%E8%B5%B0%E4%B8%80%E8%B7%AF%E8%B3%BA%EF%BC%81-zgw1Bj
PHP
<?php set_time_limit(0); function buy() { $url = 'https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_D.djhtm'; $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_HEADER, 1); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // 設定頭資訊(當用IP直接訪問時,加這個如:https://baibu.com -> 220.15.23.5) // curl_setopt($ci, CURLOPT_HTTPHEADER, array('Host:baibu.com')); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); //這個是重點,規避ssl的證書檢查。 curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); // 跳過host驗證 $data = curl_exec($curl); curl_close($curl); $myfile = fopen("buy.txt", "w") or die("Unable to open file!"); fwrite($myfile, $data); fclose($myfile); //var_dump($data); } function sell() { $url = 'https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_D.djhtm'; $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_HEADER, 1); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // 設定頭資訊(當用IP直接訪問時,加這個如:https://baibu.com -> 220.15.23.5) // curl_setopt($ci, CURLOPT_HTTPHEADER, array('Host:baibu.com')); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); //這個是重點,規避ssl的證書檢查。 curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); // 跳過host驗證 $data = curl_exec($curl); curl_close($curl); $myfile = fopen("sell.txt", "w") or die("Unable to open file!"); fwrite($myfile, $data); fclose($myfile); //var_dump($data); } function get_money_New($ID) { $url = 'https://fubon-ebrokerdj.fbs.com.tw/z/zc/zcw/zcw1_'.$ID.'.djhtm'; $file = fopen("url.txt", "w"); fwrite($file,$url); fclose($file); exec("money.exe"); Sleep(10); } function get_money($ID) { $url = 'https://fubon-ebrokerdj.fbs.com.tw/z/zc/zcw/zcw1_'.$ID.'.djhtm'; if(file_exists('123.js')) { unlink('123.js');//將檔案刪除 } if(file_exists('github.png')) { unlink('github.png');//將檔案刪除 } if(file_exists('money.txt')) { unlink('money.txt');//將檔案刪除 } $file_js = fopen("123.js", "w"); $str = "var page = require('webpage').create();"; fwrite($file_js,$str."\r\n"); $str = "var url = '".$url."';"; fwrite($file_js,$str."\r\n"); $str = "page.open(url, function (status) {"; fwrite($file_js,$str."\r\n"); $str ="page.render('github.png');"; fwrite($file_js,$str."\r\n"); $str = "console.log(page.content);"; fwrite($file_js,$str."\r\n"); $str ="phantom.exit();"; fwrite($file_js,$str."\r\n"); $str ="});"; fwrite($file_js,$str."\r\n"); fclose($file_js); //echo $url."\r\n"; system("phantomjs.exe 123.js > money.txt"); Sleep(10); } function parse_money($filename) { if(file_exists($filename)) { $file = fopen($filename, "r"); while (!feof($file)) { $str = fgets($file);//每次讀一行並做字串的相加 //$str = mb_convert_encoding($str,"big5","utf-8"); $buf = '<div class="AdapterK" id="SysJustWebGraphDIV" style="margin: 0px auto; width: 550px; min-height: 600px;" mcht="adp_1"><div tabindex="0" class="FundView opsView1 opsPC" style="width: 550px; min-height: 530px;"><div class="opsPoster" style="display: none;"></div><div class="opsView"><div class="opsHead opsWrap"><div class="opsNote" style="max-width: 430px;"><div class="notehead" style="display: inline-block;">'; if(strpos($str,$buf)!== false) { $str = str_replace($buf,"",$str); $str = str_replace("<span>",",",$str); $str = str_replace("</span>",",",$str); break; } } $Data= explode(",",$str); //print_r($Data); fclose($file); /* if(file_exists('123.js')) { unlink('123.js');//將檔案刪除 } if(file_exists('github.png')) { unlink('github.png');//將檔案刪除 } if(file_exists($filename)) { unlink($filename);//將檔案刪除 } */ //echo $Data[1].','.$Data[3].','.$Data[5].','.$Data[7]."\r\n"; return $Data[3].','.$Data[5].','.$Data[7].','; } } function spilt_buy($filename) { $count=0; if(file_exists($filename)) { $file = fopen($filename, "r"); $file_s= fopen("Data_buy.txt", "w"); if($file != NULL) { //當檔案未執行到最後一筆,迴圈繼續執行(fgets一次抓一行) while (!feof($file)) { $buf='<td class="t3t1"> <a href="javascript:Link2Stk('; $str = fgets($file);//每次讀一行並做字串的相加 if(strpos($str,$buf)!== false) { $count++; $str = str_replace($buf."'","",$str); $str = str_replace("')\">",",",$str); $str = str_replace("</a></td>",",",$str); $str = str_replace("\r\n","",$str); $ID= explode(",",$str); //get_money($ID[0]); //$str .= ",".parse_money("money.txt"); get_money_New($ID[0]);//$str .="https://tw.stock.yahoo.com/q/bc?s=".$ID[0]; $str .= ",".parse_money("money.txt"); fwrite($file_s,$str."\r\n");//寫入字串 if($count>29) { break; } } } fclose($file); fclose($file_s); } } } function spilt_sell($filename) { $count=0; if(file_exists($filename)) { $file = fopen($filename, "r"); $file_s= fopen("Data_sell.txt", "w"); if($file != NULL) { //當檔案未執行到最後一筆,迴圈繼續執行(fgets一次抓一行) while (!feof($file)) { $buf='<td class="t3t1"> <a href="javascript:Link2Stk('; $str = fgets($file);//每次讀一行並做字串的相加 if(strpos($str,$buf)!== false) { $count++; $str = str_replace($buf."'","",$str); $str = str_replace("')\">",",",$str); $str = str_replace("</a></td>",",",$str); $str = str_replace("\r\n","",$str); $ID= explode(",",$str); //get_money($ID[0]); //$str .= ",".parse_money("money.txt"); get_money_New($ID[0]);//$str .="https://tw.stock.yahoo.com/q/bc?s=".$ID[0]; $str .= ",".parse_money("money.txt"); fwrite($file_s,$str."\r\n");//寫入字串 if($count>29) { break; } } } fclose($file); fclose($file_s); } } } buy(); sell(); spilt_buy("buy.txt"); spilt_sell("sell.txt"); unlink('buy.txt'); unlink('sell.txt'); echo 'donload finish...' ?>
C#
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.IO; using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; using System.Windows.Forms; namespace money { public partial class Form1 : Form { string m_StrUrl = "https://www.google.com"; int count = 0; public Form1() { InitializeComponent(); try { StreamReader sr = new StreamReader("url.txt"); while (!sr.EndOfStream) { m_StrUrl = sr.ReadLine(); } sr.Close(); } catch { } } private void Form1_Load(object sender, EventArgs e) { webBrowser1.Url = new Uri(m_StrUrl); this.WindowState = FormWindowState.Minimized; timer1.Enabled = false; } private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { timer1.Enabled=true; } private void timer1_Tick(object sender, EventArgs e) { File.WriteAllText("money.txt", webBrowser1.Document.Body.Parent.OuterHtml, Encoding.GetEncoding(webBrowser1.Document.Encoding)); this.Close(); } } }
BAT
del %cd%\root\Data_buy.txt del %cd%\root\Data_sell.txt del %cd%\Data_buy.csv del %cd%\Data_sell.csv start usbwebserver.exe wget.exe "http://localhost:8080/php_curl_https.php" -O wait.txt taskkill /f /im usbwebserver.exe copy %cd%\root\Data_buy.txt %cd%\Data_buy.csv copy %cd%\root\Data_sell.txt %cd%\Data_sell.csv del %cd%\wait.txt
2 thoughts on “利用PHP+C#製作一個Ajax網頁渲染器 撰寫簡易台股外資買賣超資料爬蟲程式[CS_PHP_spider]”
BAT
刪除檔案
拷貝檔案
取得目前路徑
執行和關閉程式
使用WGET執行爬蟲程式
PHP
刪檔案
執行外部程式並等待 exec(“money.exe”);
一次讀一行檔案資料
CUTL 連結 HTTPS 網站
寫檔
字串取代
字串搜尋/字串包含/字串判斷
檔案是否存在