利用PHP+C#製作一個Ajax網頁渲染器 撰寫簡易台股外資買賣超資料爬蟲程式[CS_PHP_spider]

利用PHP+C#製作一個Ajax網頁渲染器 撰寫簡易台股外資買賣超資料爬蟲程式[CS_PHP_spider]

利用PHP+C#製作一個Ajax網頁渲染器 撰寫簡易台股外資買賣超資料爬蟲程式[CS_PHP_spider]


GITHUB: https://github.com/jash-git/CS_PHP_spider


可執行完整檔案: 下載


相關資料:

    買賣超排行榜
        https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZGK_D.djhtm

    買超
        https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_D.djhtm
        https://tw.stock.yahoo.com/d/i/fgbuy_tse.html

    賣超
        https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_DA.djhtm
        https://tw.stock.yahoo.com/d/i/fgsell_tse.html

    每日股價
        https://fubon-ebrokerdj.fbs.com.tw/z/zc/zcw/zcw1_3037.djhtm
        https://tw.stock.yahoo.com/q/bc?s=3037

    看來是這一篇
    http://smart.businessweekly.com.tw/Magazine/detail.aspx?id=63709&s=Books

    https://today.line.me/tw/v2/article/%E5%8F%B0%E8%82%A1%E6%8A%95%E8%B3%87%E5%BF%85%E7%9C%8B%E7%B1%8C%E7%A2%BC%E9%9D%A2%EF%BC%9A%E6%95%99%E4%BD%A0%E7%94%A8Excel%E8%A8%98%E9%8C%84%E8%B3%87%E9%87%91%E6%B5%81%E5%90%91%EF%BC%8C%E8%B7%9F%E8%91%97%E4%B8%BB%E5%8A%9B%E8%B5%B0%E4%B8%80%E8%B7%AF%E8%B3%BA%EF%BC%81-zgw1Bj


PHP

<?php
	set_time_limit(0);
	function buy()
	{
		$url = 'https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_D.djhtm';
		$curl = curl_init();
		curl_setopt($curl, CURLOPT_URL, $url);
		curl_setopt($curl, CURLOPT_HEADER, 1);
		curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
		// 設定頭資訊(當用IP直接訪問時,加這個如:https://baibu.com -> 220.15.23.5)
		// curl_setopt($ci, CURLOPT_HTTPHEADER, array('Host:baibu.com'));
		curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); //這個是重點,規避ssl的證書檢查。
		curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); // 跳過host驗證
		$data = curl_exec($curl);
		curl_close($curl);

		$myfile = fopen("buy.txt", "w") or die("Unable to open file!");
		fwrite($myfile, $data);
		fclose($myfile);
		//var_dump($data);	
	}


	function sell()
	{
	$url = 'https://fubon-ebrokerdj.fbs.com.tw/Z/ZG/ZG_D.djhtm';
	$curl = curl_init();
	curl_setopt($curl, CURLOPT_URL, $url);
	curl_setopt($curl, CURLOPT_HEADER, 1);
	curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
	// 設定頭資訊(當用IP直接訪問時,加這個如:https://baibu.com -> 220.15.23.5)
	// curl_setopt($ci, CURLOPT_HTTPHEADER, array('Host:baibu.com'));
	curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); //這個是重點,規避ssl的證書檢查。
	curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); // 跳過host驗證
	$data = curl_exec($curl);
	curl_close($curl);

	$myfile = fopen("sell.txt", "w") or die("Unable to open file!");
	fwrite($myfile, $data);
	fclose($myfile);
	//var_dump($data);		
	}

	function get_money_New($ID)
	{
		$url = 'https://fubon-ebrokerdj.fbs.com.tw/z/zc/zcw/zcw1_'.$ID.'.djhtm';
		$file = fopen("url.txt", "w");
		fwrite($file,$url);
		fclose($file);
		exec("money.exe");
		Sleep(10);
	}
	
	function get_money($ID)
	{
		$url = 'https://fubon-ebrokerdj.fbs.com.tw/z/zc/zcw/zcw1_'.$ID.'.djhtm';
		if(file_exists('123.js'))
		{
            unlink('123.js');//將檔案刪除
		}
		if(file_exists('github.png'))
		{
			unlink('github.png');//將檔案刪除
		}
		if(file_exists('money.txt'))
		{
			unlink('money.txt');//將檔案刪除
		}
	
		$file_js = fopen("123.js", "w");
		$str = "var page = require('webpage').create();";
		fwrite($file_js,$str."\r\n");
		
		$str = "var url = '".$url."';";
		fwrite($file_js,$str."\r\n");
		
		$str = "page.open(url, function (status) {";
		fwrite($file_js,$str."\r\n");
				
		$str ="page.render('github.png');";
		fwrite($file_js,$str."\r\n");
		
		$str = "console.log(page.content);";
		fwrite($file_js,$str."\r\n");
		
		$str ="phantom.exit();";
		fwrite($file_js,$str."\r\n");
		
		$str ="});";
		fwrite($file_js,$str."\r\n");
		fclose($file_js);
		
		//echo $url."\r\n";
		system("phantomjs.exe 123.js > money.txt");
		Sleep(10);
	}
	
	function parse_money($filename)
	{
		if(file_exists($filename))
		{
			$file = fopen($filename, "r");
			while (!feof($file))
			{
				$str = fgets($file);//每次讀一行並做字串的相加
				//$str = mb_convert_encoding($str,"big5","utf-8");
				$buf = '<div class="AdapterK" id="SysJustWebGraphDIV" style="margin: 0px auto; width: 550px; min-height: 600px;" mcht="adp_1"><div tabindex="0" class="FundView opsView1 opsPC" style="width: 550px; min-height: 530px;"><div class="opsPoster" style="display: none;"></div><div class="opsView"><div class="opsHead opsWrap"><div class="opsNote" style="max-width: 430px;"><div class="notehead" style="display: inline-block;">';
				if(strpos($str,$buf)!== false)
				{
					$str = str_replace($buf,"",$str);
					$str = str_replace("<span>",",",$str);
					$str = str_replace("</span>",",",$str);
					break;
				}		
			}
			
			$Data= explode(",",$str);
			//print_r($Data);
			
			fclose($file);
			
			/*
			if(file_exists('123.js'))
			{
				unlink('123.js');//將檔案刪除
			}
			if(file_exists('github.png'))
			{
				unlink('github.png');//將檔案刪除
			}
			if(file_exists($filename))
			{
				unlink($filename);//將檔案刪除
			}
			*/
			
			//echo $Data[1].','.$Data[3].','.$Data[5].','.$Data[7]."\r\n";
			return $Data[3].','.$Data[5].','.$Data[7].',';
		}
		
		
			
	}

	function spilt_buy($filename)
	{
		$count=0;
		if(file_exists($filename))
		{
			$file = fopen($filename, "r");
			$file_s= fopen("Data_buy.txt", "w");
			if($file != NULL)
			{
				//當檔案未執行到最後一筆,迴圈繼續執行(fgets一次抓一行)
				while (!feof($file))
				{
					$buf='<td class="t3t1">&nbsp;<a href="javascript:Link2Stk(';
					$str = fgets($file);//每次讀一行並做字串的相加
					if(strpos($str,$buf)!== false)
					{
						$count++;
						$str = str_replace($buf."'","",$str);
						$str = str_replace("')\">",",",$str);
						$str = str_replace("</a></td>",",",$str);
						$str = str_replace("\r\n","",$str);
						$ID= explode(",",$str);
						//get_money($ID[0]);			
						//$str .= ",".parse_money("money.txt");
						
						get_money_New($ID[0]);//$str .="https://tw.stock.yahoo.com/q/bc?s=".$ID[0];
						$str .= ",".parse_money("money.txt");
						
						fwrite($file_s,$str."\r\n");//寫入字串

						if($count>29)
						{
							break;							
						}
					}
				}
				
				fclose($file);
				fclose($file_s);
			}
		}		
	}

	function spilt_sell($filename)
	{
		$count=0;
		if(file_exists($filename))
		{
			$file = fopen($filename, "r");
			$file_s= fopen("Data_sell.txt", "w");
			if($file != NULL)
			{
				//當檔案未執行到最後一筆,迴圈繼續執行(fgets一次抓一行)
				while (!feof($file))
				{
					$buf='<td class="t3t1">&nbsp;<a href="javascript:Link2Stk(';
					$str = fgets($file);//每次讀一行並做字串的相加
					if(strpos($str,$buf)!== false)
					{
						$count++;
						$str = str_replace($buf."'","",$str);
						$str = str_replace("')\">",",",$str);
						$str = str_replace("</a></td>",",",$str);
						$str = str_replace("\r\n","",$str);
						$ID= explode(",",$str);
						//get_money($ID[0]);
						//$str .= ",".parse_money("money.txt");
						
						get_money_New($ID[0]);//$str .="https://tw.stock.yahoo.com/q/bc?s=".$ID[0];
						$str .= ",".parse_money("money.txt");

						
						fwrite($file_s,$str."\r\n");//寫入字串

						if($count>29)
						{
							break;							
						}
					}
				}
				
				fclose($file);
				fclose($file_s);
			}
		}		
	}
	
	buy();
	sell();
	spilt_buy("buy.txt");
	spilt_sell("sell.txt");
	unlink('buy.txt');
	unlink('sell.txt');
	echo 'donload finish...'
?>


C#

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;

namespace money
{
    public partial class Form1 : Form
    {
        string m_StrUrl = "https://www.google.com";
        int count = 0;
        public Form1()
        {
            InitializeComponent();
            try
            {
                StreamReader sr = new StreamReader("url.txt");
                while (!sr.EndOfStream)
                {
                    m_StrUrl = sr.ReadLine();
                }
                sr.Close();
            }
            catch
            {

            }

        }

        private void Form1_Load(object sender, EventArgs e)
        {
            webBrowser1.Url = new Uri(m_StrUrl);
            this.WindowState = FormWindowState.Minimized;
            timer1.Enabled = false;
        }

        private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {

            timer1.Enabled=true;

        }

        private void timer1_Tick(object sender, EventArgs e)
        {
            File.WriteAllText("money.txt", webBrowser1.Document.Body.Parent.OuterHtml, Encoding.GetEncoding(webBrowser1.Document.Encoding));
            this.Close();
        }
    }
} 


BAT

del %cd%\root\Data_buy.txt
del %cd%\root\Data_sell.txt
del %cd%\Data_buy.csv
del %cd%\Data_sell.csv

start usbwebserver.exe
wget.exe "http://localhost:8080/php_curl_https.php" -O wait.txt
taskkill /f /im usbwebserver.exe

copy %cd%\root\Data_buy.txt %cd%\Data_buy.csv
copy %cd%\root\Data_sell.txt %cd%\Data_sell.csv
del %cd%\wait.txt 


2 thoughts on “利用PHP+C#製作一個Ajax網頁渲染器 撰寫簡易台股外資買賣超資料爬蟲程式[CS_PHP_spider]

  1. BAT
    刪除檔案
    拷貝檔案
    取得目前路徑
    執行和關閉程式
    使用WGET執行爬蟲程式

  2. PHP
    刪檔案
    執行外部程式並等待 exec(“money.exe”);
    一次讀一行檔案資料
    CUTL 連結 HTTPS 網站
    寫檔
    字串取代
    字串搜尋/字串包含/字串判斷
    檔案是否存在

發表迴響

你的電子郵件位址並不會被公開。 必要欄位標記為 *