Commit 6ad20bc1 by Phạm Văn Đoan

test lấy lượt nghe ở trang nCT

parent 33a823a7
<?php
namespace App\Console\Commands;
use App\Repositories\TrackRepository;
use Illuminate\Console\Command;
class CrawlerNctTrackListenCommand extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'crawler:get-nct-listen {limit?}';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Lấy lượt nghe ở DOM từ trang của NCT';
protected $trackRepository;
/**
* Create a new command instance.
*
* CrawlerNctTrackListenCommand constructor.
* @param TrackRepository $trackRepository
*/
public function __construct(TrackRepository $trackRepository)
{
parent::__construct();
ini_set('max_execution_time', -1);
$this->trackRepository = $trackRepository;
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
$from_page = 'nct';
$limit = 10;
$crawlered = $this->trackRepository->getCrawlerListenInserted($from_page, false);
echo "\n Tổng số bài hát đã lấy lượt nghe hôm nay: " . count($crawlered);
// Lấy mảng bài hát theo từng hệ thống nhạc
$inserted = $this->trackRepository->getTrackBySource($from_page, $crawlered, $limit);
$total = count($inserted);
echo "\n Tổng số bài hát cần lấy lượt nghe: " . $total;
$data = $this->trackRepository->getNctTrackListen($inserted);
print_r($data);die();
}
}
......@@ -4,6 +4,7 @@ namespace App\Console;
use App\Console\Commands\CalculateDailyListenCommand;
use App\Console\Commands\CalculateReportCommand;
use App\Console\Commands\CrawlerNctTrackListenCommand;
use App\Console\Commands\CrawlerTrackListenCommand;
use App\Console\Commands\CrontjobCommand;
use App\Console\Commands\ExportRankingCommand;
......@@ -49,7 +50,9 @@ class Kernel extends ConsoleKernel
ExportRankingCommand::class,
SendWarningCommand::class
SendWarningCommand::class,
CrawlerNctTrackListenCommand::class,
];
......
......@@ -182,6 +182,35 @@ if (!function_exists('cURL')) {
}
}
if (!function_exists('cURLFromNct')) {
function cURLFromNct($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($ch, CURLOPT_ENCODING, 'gzip, deflate, br');
$headers = array();
$headers[] = 'Authority: www.nhaccuatui.com';
$headers[] = 'Cache-Control: max-age=0';
$headers[] = 'Upgrade-Insecure-Requests: 1';
$headers[] = 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36';
$headers[] = 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9';
$headers[] = 'Accept-Encoding: gzip, deflate, br';
$headers[] = 'Accept-Language: vi,vi-VN;q=0.9,en-US;q=0.8,en;q=0.7';
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($ch);
if (curl_errno($ch)) {
return false;
}
curl_close ($ch);
return $result;
}
}
if (!function_exists('getZingUrlGetCounter')) {
/**
* Hàm tạo URL cho từng bài hát dựa vào ID chuỗi của nó để lấy lượt nghe
......
......@@ -435,5 +435,38 @@ class TrackRepository extends BaseRepository
return $tracks;
}
public function getNctTrackListen($inserted)
{
return file_get_contents('https://www.nhaccuatui.com/interaction/api/counter?listSongIds=6217224');
$tracks = [];
//
if (count($inserted) > 0) {
foreach ($inserted as $key => $track) {
$url = $track['link_nct'];
//$dom = new Htmldom($url);
$dom = file_get_contents($url);
//print_r($dom);
//sleep(5);
/* Lấy tên bài hát ------------------------------------------------------------------------------------------ */
/*$element_track_title = 'div.show_listen span';
foreach ($dom->find($element_track_title) as $data) {
$tracks[$track['id']] = str_replace([], [], $data->innertext);
}*/
}
return $tracks;
} else {
return null;
}
}
}
}
\ No newline at end of file
......@@ -169,6 +169,7 @@ return [
Laravel\Tinker\TinkerServiceProvider::class,
Yangqi\Htmldom\HtmldomServiceProvider::class,
Maatwebsite\Excel\ExcelServiceProvider::class,
Ixudra\Curl\CurlServiceProvider::class,
/*
* Application Service Providers...
......@@ -229,6 +230,7 @@ return [
'View' => Illuminate\Support\Facades\View::class,
'Htmldom' => Yangqi\Htmldom\Htmldom::class,
'Excel' => Maatwebsite\Excel\Facades\Excel::class,
'Curl' => Ixudra\Curl\Facades\Curl::class,
],
......
......@@ -9,7 +9,7 @@
*/
class Htmldomnode
{
public $nodetype = HDOM_TYPE_TEXT;
public $nodetype = YANGQI_HDOM_TYPE_TEXT;
public $tag = 'text';
public $attr = array();
public $children = array();
......@@ -110,9 +110,9 @@ class Htmldomnode
}
$string .= " HDOM_INNER_INFO: '";
if (isset($node->_[HDOM_INFO_INNER]))
if (isset($node->_[YANGQI_HDOM_INFO_INNER]))
{
$string .= $node->_[HDOM_INFO_INNER] . "'";
$string .= $node->_[YANGQI_HDOM_INFO_INNER] . "'";
}
else
{
......@@ -249,8 +249,8 @@ class Htmldomnode
// get dom node's inner html
function innertext()
{
if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER];
if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
if (isset($this->_[YANGQI_HDOM_INFO_INNER])) return $this->_[YANGQI_HDOM_INFO_INNER];
if (isset($this->_[YANGQI_HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[YANGQI_HDOM_INFO_TEXT]);
$ret = '';
foreach ($this->nodes as $n)
......@@ -283,24 +283,24 @@ class Htmldomnode
call_user_func_array($this->dom->callback, array($this));
}
if (isset($this->_[HDOM_INFO_OUTER])) return $this->_[HDOM_INFO_OUTER];
if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
if (isset($this->_[YANGQI_HDOM_INFO_OUTER])) return $this->_[YANGQI_HDOM_INFO_OUTER];
if (isset($this->_[YANGQI_HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[YANGQI_HDOM_INFO_TEXT]);
// render begin tag
if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]])
if ($this->dom && $this->dom->nodes[$this->_[YANGQI_HDOM_INFO_BEGIN]])
{
$ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup();
$ret = $this->dom->nodes[$this->_[YANGQI_HDOM_INFO_BEGIN]]->makeup();
} else {
$ret = "";
}
// render inner text
if (isset($this->_[HDOM_INFO_INNER]))
if (isset($this->_[YANGQI_HDOM_INFO_INNER]))
{
// If it's a br tag... don't return the HDOM_INNER_INFO that we may or may not have added.
if ($this->tag != "br")
{
$ret .= $this->_[HDOM_INFO_INNER];
$ret .= $this->_[YANGQI_HDOM_INFO_INNER];
}
} else {
if ($this->nodes)
......@@ -313,7 +313,7 @@ class Htmldomnode
}
// render end tag
if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END]!=0)
if (isset($this->_[YANGQI_HDOM_INFO_END]) && $this->_[YANGQI_HDOM_INFO_END]!=0)
$ret .= '</'.$this->tag.'>';
return $ret;
}
......@@ -321,10 +321,10 @@ class Htmldomnode
// get dom node's plain text
function text()
{
if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER];
if (isset($this->_[YANGQI_HDOM_INFO_INNER])) return $this->_[YANGQI_HDOM_INFO_INNER];
switch ($this->nodetype)
{
case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
case YANGQI_HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[YANGQI_HDOM_INFO_TEXT]);
case HDOM_TYPE_COMMENT: return '';
case HDOM_TYPE_UNKNOWN: return '';
}
......@@ -365,7 +365,7 @@ class Htmldomnode
function makeup()
{
// text, comment, unknown
if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
if (isset($this->_[YANGQI_HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[YANGQI_HDOM_INFO_TEXT]);
$ret = '<'.$this->tag;
$i = -1;
......@@ -393,7 +393,7 @@ class Htmldomnode
}
}
$ret = $this->dom->restore_noise($ret);
return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>';
return $ret . $this->_[YANGQI_HDOM_INFO_ENDSPACE] . '>';
}
// find elements by css selector
......@@ -410,9 +410,9 @@ class Htmldomnode
// The change on the below line was documented on the sourceforge code tracker id 2788009
// used to be: if (($levle=count($selectors[0]))===0) return array();
if (($levle=count($selectors[$c]))===0) return array();
if (!isset($this->_[HDOM_INFO_BEGIN])) return array();
if (!isset($this->_[YANGQI_HDOM_INFO_BEGIN])) return array();
$head = array($this->_[HDOM_INFO_BEGIN]=>1);
$head = array($this->_[YANGQI_HDOM_INFO_BEGIN]=>1);
// handle descendant selectors, no recursive!
for ($l=0; $l<$levle; ++$l)
......@@ -466,7 +466,7 @@ class Htmldomnode
{
if ($tag==='*' || $tag===$c->tag) {
if (++$count==$key) {
$ret[$c->_[HDOM_INFO_BEGIN]] = 1;
$ret[$c->_[YANGQI_HDOM_INFO_BEGIN]] = 1;
return;
}
}
......@@ -474,17 +474,17 @@ class Htmldomnode
return;
}
$end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0;
$end = (!empty($this->_[YANGQI_HDOM_INFO_END])) ? $this->_[YANGQI_HDOM_INFO_END] : 0;
if ($end==0) {
$parent = $this->parent;
while (!isset($parent->_[HDOM_INFO_END]) && $parent!==null) {
while (!isset($parent->_[YANGQI_HDOM_INFO_END]) && $parent!==null) {
$end -= 1;
$parent = $parent->parent;
}
$end += $parent->_[HDOM_INFO_END];
$end += $parent->_[YANGQI_HDOM_INFO_END];
}
for ($i=$this->_[HDOM_INFO_BEGIN]+1; $i<$end; ++$i) {
for ($i=$this->_[YANGQI_HDOM_INFO_BEGIN]+1; $i<$end; ++$i) {
$node = $this->dom->nodes[$i];
$pass = true;
......@@ -641,10 +641,10 @@ class Htmldomnode
switch ($name)
{
case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value;
case 'outertext': return $this->_[YANGQI_HDOM_INFO_OUTER] = $value;
case 'innertext':
if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value;
return $this->_[HDOM_INFO_INNER] = $value;
if (isset($this->_[YANGQI_HDOM_INFO_TEXT])) return $this->_[YANGQI_HDOM_INFO_TEXT] = $value;
return $this->_[YANGQI_HDOM_INFO_INNER] = $value;
}
if (!isset($this->attr[$name]))
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment