利用PHP+C#製作一個Ajax網頁渲染器 撰寫簡易台股外資買賣超資料爬蟲程式[CS_PHP_spider]
利用PHP+C#製作一個Ajax網頁渲染器 撰寫簡易台股外資買賣超資料爬蟲程式[CS_PHP_spider]
GITHUB: https://github.com/jash-git/CS_PHP_spider
可執行完整檔案: 下載
相關資料:
買賣超排行榜
https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZGK_D.djhtm
買超
https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_D.djhtm
https://tw.stock.yahoo.com/d/i/fgbuy_tse.html
賣超
https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_DA.djhtm
https://tw.stock.yahoo.com/d/i/fgsell_tse.html
每日股價
https://fubon-ebrokerdj.fbs.com.tw/z/zc/zcw/zcw1_3037.djhtm
https://tw.stock.yahoo.com/q/bc?s=3037
看來是這一篇
http://smart.businessweekly.com.tw/Magazine/detail.aspx?id=63709&s=Books
https://today.line.me/tw/v2/article/%E5%8F%B0%E8%82%A1%E6%8A%95%E8%B3%87%E5%BF%85%E7%9C%8B%E7%B1%8C%E7%A2%BC%E9%9D%A2%EF%BC%9A%E6%95%99%E4%BD%A0%E7%94%A8Excel%E8%A8%98%E9%8C%84%E8%B3%87%E9%87%91%E6%B5%81%E5%90%91%EF%BC%8C%E8%B7%9F%E8%91%97%E4%B8%BB%E5%8A%9B%E8%B5%B0%E4%B8%80%E8%B7%AF%E8%B3%BA%EF%BC%81-zgw1Bj
PHP
<?php
set_time_limit(0);
function buy()
{
$url = 'https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_D.djhtm';
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HEADER, 1);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
// 設定頭資訊(當用IP直接訪問時,加這個如:https://baibu.com -> 220.15.23.5)
// curl_setopt($ci, CURLOPT_HTTPHEADER, array('Host:baibu.com'));
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); //這個是重點,規避ssl的證書檢查。
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); // 跳過host驗證
$data = curl_exec($curl);
curl_close($curl);
$myfile = fopen("buy.txt", "w") or die("Unable to open file!");
fwrite($myfile, $data);
fclose($myfile);
//var_dump($data);
}
function sell()
{
$url = 'https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_D.djhtm';
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HEADER, 1);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
// 設定頭資訊(當用IP直接訪問時,加這個如:https://baibu.com -> 220.15.23.5)
// curl_setopt($ci, CURLOPT_HTTPHEADER, array('Host:baibu.com'));
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); //這個是重點,規避ssl的證書檢查。
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); // 跳過host驗證
$data = curl_exec($curl);
curl_close($curl);
$myfile = fopen("sell.txt", "w") or die("Unable to open file!");
fwrite($myfile, $data);
fclose($myfile);
//var_dump($data);
}
function get_money_New($ID)
{
$url = 'https://fubon-ebrokerdj.fbs.com.tw/z/zc/zcw/zcw1_'.$ID.'.djhtm';
$file = fopen("url.txt", "w");
fwrite($file,$url);
fclose($file);
exec("money.exe");
Sleep(10);
}
function get_money($ID)
{
$url = 'https://fubon-ebrokerdj.fbs.com.tw/z/zc/zcw/zcw1_'.$ID.'.djhtm';
if(file_exists('123.js'))
{
unlink('123.js');//將檔案刪除
}
if(file_exists('github.png'))
{
unlink('github.png');//將檔案刪除
}
if(file_exists('money.txt'))
{
unlink('money.txt');//將檔案刪除
}
$file_js = fopen("123.js", "w");
$str = "var page = require('webpage').create();";
fwrite($file_js,$str."\r\n");
$str = "var url = '".$url."';";
fwrite($file_js,$str."\r\n");
$str = "page.open(url, function (status) {";
fwrite($file_js,$str."\r\n");
$str ="page.render('github.png');";
fwrite($file_js,$str."\r\n");
$str = "console.log(page.content);";
fwrite($file_js,$str."\r\n");
$str ="phantom.exit();";
fwrite($file_js,$str."\r\n");
$str ="});";
fwrite($file_js,$str."\r\n");
fclose($file_js);
//echo $url."\r\n";
system("phantomjs.exe 123.js > money.txt");
Sleep(10);
}
function parse_money($filename)
{
if(file_exists($filename))
{
$file = fopen($filename, "r");
while (!feof($file))
{
$str = fgets($file);//每次讀一行並做字串的相加
//$str = mb_convert_encoding($str,"big5","utf-8");
$buf = '<div class="AdapterK" id="SysJustWebGraphDIV" style="margin: 0px auto; width: 550px; min-height: 600px;" mcht="adp_1"><div tabindex="0" class="FundView opsView1 opsPC" style="width: 550px; min-height: 530px;"><div class="opsPoster" style="display: none;"></div><div class="opsView"><div class="opsHead opsWrap"><div class="opsNote" style="max-width: 430px;"><div class="notehead" style="display: inline-block;">';
if(strpos($str,$buf)!== false)
{
$str = str_replace($buf,"",$str);
$str = str_replace("<span>",",",$str);
$str = str_replace("</span>",",",$str);
break;
}
}
$Data= explode(",",$str);
//print_r($Data);
fclose($file);
/*
if(file_exists('123.js'))
{
unlink('123.js');//將檔案刪除
}
if(file_exists('github.png'))
{
unlink('github.png');//將檔案刪除
}
if(file_exists($filename))
{
unlink($filename);//將檔案刪除
}
*/
//echo $Data[1].','.$Data[3].','.$Data[5].','.$Data[7]."\r\n";
return $Data[3].','.$Data[5].','.$Data[7].',';
}
}
function spilt_buy($filename)
{
$count=0;
if(file_exists($filename))
{
$file = fopen($filename, "r");
$file_s= fopen("Data_buy.txt", "w");
if($file != NULL)
{
//當檔案未執行到最後一筆,迴圈繼續執行(fgets一次抓一行)
while (!feof($file))
{
$buf='<td class="t3t1"> <a href="javascript:Link2Stk(';
$str = fgets($file);//每次讀一行並做字串的相加
if(strpos($str,$buf)!== false)
{
$count++;
$str = str_replace($buf."'","",$str);
$str = str_replace("')\">",",",$str);
$str = str_replace("</a></td>",",",$str);
$str = str_replace("\r\n","",$str);
$ID= explode(",",$str);
//get_money($ID[0]);
//$str .= ",".parse_money("money.txt");
get_money_New($ID[0]);//$str .="https://tw.stock.yahoo.com/q/bc?s=".$ID[0];
$str .= ",".parse_money("money.txt");
fwrite($file_s,$str."\r\n");//寫入字串
if($count>29)
{
break;
}
}
}
fclose($file);
fclose($file_s);
}
}
}
function spilt_sell($filename)
{
$count=0;
if(file_exists($filename))
{
$file = fopen($filename, "r");
$file_s= fopen("Data_sell.txt", "w");
if($file != NULL)
{
//當檔案未執行到最後一筆,迴圈繼續執行(fgets一次抓一行)
while (!feof($file))
{
$buf='<td class="t3t1"> <a href="javascript:Link2Stk(';
$str = fgets($file);//每次讀一行並做字串的相加
if(strpos($str,$buf)!== false)
{
$count++;
$str = str_replace($buf."'","",$str);
$str = str_replace("')\">",",",$str);
$str = str_replace("</a></td>",",",$str);
$str = str_replace("\r\n","",$str);
$ID= explode(",",$str);
//get_money($ID[0]);
//$str .= ",".parse_money("money.txt");
get_money_New($ID[0]);//$str .="https://tw.stock.yahoo.com/q/bc?s=".$ID[0];
$str .= ",".parse_money("money.txt");
fwrite($file_s,$str."\r\n");//寫入字串
if($count>29)
{
break;
}
}
}
fclose($file);
fclose($file_s);
}
}
}
buy();
sell();
spilt_buy("buy.txt");
spilt_sell("sell.txt");
unlink('buy.txt');
unlink('sell.txt');
echo 'donload finish...'
?>
C#
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace money
{
public partial class Form1 : Form
{
string m_StrUrl = "https://www.google.com";
int count = 0;
public Form1()
{
InitializeComponent();
try
{
StreamReader sr = new StreamReader("url.txt");
while (!sr.EndOfStream)
{
m_StrUrl = sr.ReadLine();
}
sr.Close();
}
catch
{
}
}
private void Form1_Load(object sender, EventArgs e)
{
webBrowser1.Url = new Uri(m_StrUrl);
this.WindowState = FormWindowState.Minimized;
timer1.Enabled = false;
}
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
timer1.Enabled=true;
}
private void timer1_Tick(object sender, EventArgs e)
{
File.WriteAllText("money.txt", webBrowser1.Document.Body.Parent.OuterHtml, Encoding.GetEncoding(webBrowser1.Document.Encoding));
this.Close();
}
}
}
BAT
del %cd%\root\Data_buy.txt del %cd%\root\Data_sell.txt del %cd%\Data_buy.csv del %cd%\Data_sell.csv start usbwebserver.exe wget.exe "http://localhost:8080/php_curl_https.php" -O wait.txt taskkill /f /im usbwebserver.exe copy %cd%\root\Data_buy.txt %cd%\Data_buy.csv copy %cd%\root\Data_sell.txt %cd%\Data_sell.csv del %cd%\wait.txt
3 thoughts on “利用PHP+C#製作一個Ajax網頁渲染器 撰寫簡易台股外資買賣超資料爬蟲程式[CS_PHP_spider]”
BAT
刪除檔案
拷貝檔案
取得目前路徑
執行和關閉程式
使用WGET執行爬蟲程式
PHP
刪檔案
執行外部程式並等待 exec(“money.exe”);
一次讀一行檔案資料
CUTL 連結 HTTPS 網站
寫檔
字串取代
字串搜尋/字串包含/字串判斷
檔案是否存在
PHP SERVER 超時設定/ TimeOut/Time Out/不過期
set_time_limit(0);//修改PHP的執行時間上限,0表示永遠不過期 [破解預設30S的限制]