Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
crawler.vmusicchart.vn
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Phạm Văn Đoan
crawler.vmusicchart.vn
Commits
6ad20bc1
Commit
6ad20bc1
authored
Jun 10, 2020
by
Phạm Văn Đoan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
test lấy lượt nghe ở trang nCT
parent
33a823a7
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
161 additions
and
29 deletions
+161
-29
CrawlerNctTrackListenCommand.php
app/Console/Commands/CrawlerNctTrackListenCommand.php
+66
-0
Kernel.php
app/Console/Kernel.php
+4
-1
Functions.php
app/Helpers/Functions.php
+29
-0
TrackRepository.php
app/Repositories/TrackRepository.php
+34
-2
app.php
config/app.php
+2
-0
Htmldomnode.php
packages/yangqi/htmldom/src/Yangqi/Htmldom/Htmldomnode.php
+26
-26
No files found.
app/Console/Commands/CrawlerNctTrackListenCommand.php
0 → 100644
View file @
6ad20bc1
<?php
namespace
App\Console\Commands
;
use
App\Repositories\TrackRepository
;
use
Illuminate\Console\Command
;
class
CrawlerNctTrackListenCommand
extends
Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected
$signature
=
'crawler:get-nct-listen {limit?}'
;
/**
* The console command description.
*
* @var string
*/
protected
$description
=
'Lấy lượt nghe ở DOM từ trang của NCT'
;
protected
$trackRepository
;
/**
* Create a new command instance.
*
* CrawlerNctTrackListenCommand constructor.
* @param TrackRepository $trackRepository
*/
public
function
__construct
(
TrackRepository
$trackRepository
)
{
parent
::
__construct
();
ini_set
(
'max_execution_time'
,
-
1
);
$this
->
trackRepository
=
$trackRepository
;
}
/**
* Execute the console command.
*
* @return mixed
*/
public
function
handle
()
{
$from_page
=
'nct'
;
$limit
=
10
;
$crawlered
=
$this
->
trackRepository
->
getCrawlerListenInserted
(
$from_page
,
false
);
echo
"
\n
Tổng số bài hát đã lấy lượt nghe hôm nay: "
.
count
(
$crawlered
);
// Lấy mảng bài hát theo từng hệ thống nhạc
$inserted
=
$this
->
trackRepository
->
getTrackBySource
(
$from_page
,
$crawlered
,
$limit
);
$total
=
count
(
$inserted
);
echo
"
\n
Tổng số bài hát cần lấy lượt nghe: "
.
$total
;
$data
=
$this
->
trackRepository
->
getNctTrackListen
(
$inserted
);
print_r
(
$data
);
die
();
}
}
app/Console/Kernel.php
View file @
6ad20bc1
...
@@ -4,6 +4,7 @@ namespace App\Console;
...
@@ -4,6 +4,7 @@ namespace App\Console;
use
App\Console\Commands\CalculateDailyListenCommand
;
use
App\Console\Commands\CalculateDailyListenCommand
;
use
App\Console\Commands\CalculateReportCommand
;
use
App\Console\Commands\CalculateReportCommand
;
use
App\Console\Commands\CrawlerNctTrackListenCommand
;
use
App\Console\Commands\CrawlerTrackListenCommand
;
use
App\Console\Commands\CrawlerTrackListenCommand
;
use
App\Console\Commands\CrontjobCommand
;
use
App\Console\Commands\CrontjobCommand
;
use
App\Console\Commands\ExportRankingCommand
;
use
App\Console\Commands\ExportRankingCommand
;
...
@@ -49,7 +50,9 @@ class Kernel extends ConsoleKernel
...
@@ -49,7 +50,9 @@ class Kernel extends ConsoleKernel
ExportRankingCommand
::
class
,
ExportRankingCommand
::
class
,
SendWarningCommand
::
class
SendWarningCommand
::
class
,
CrawlerNctTrackListenCommand
::
class
,
];
];
...
...
app/Helpers/Functions.php
View file @
6ad20bc1
...
@@ -182,6 +182,35 @@ if (!function_exists('cURL')) {
...
@@ -182,6 +182,35 @@ if (!function_exists('cURL')) {
}
}
}
}
if
(
!
function_exists
(
'cURLFromNct'
))
{
function
cURLFromNct
(
$url
)
{
$ch
=
curl_init
();
curl_setopt
(
$ch
,
CURLOPT_URL
,
$url
);
curl_setopt
(
$ch
,
CURLOPT_RETURNTRANSFER
,
1
);
curl_setopt
(
$ch
,
CURLOPT_CUSTOMREQUEST
,
'GET'
);
curl_setopt
(
$ch
,
CURLOPT_ENCODING
,
'gzip, deflate, br'
);
$headers
=
array
();
$headers
[]
=
'Authority: www.nhaccuatui.com'
;
$headers
[]
=
'Cache-Control: max-age=0'
;
$headers
[]
=
'Upgrade-Insecure-Requests: 1'
;
$headers
[]
=
'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'
;
$headers
[]
=
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
;
$headers
[]
=
'Accept-Encoding: gzip, deflate, br'
;
$headers
[]
=
'Accept-Language: vi,vi-VN;q=0.9,en-US;q=0.8,en;q=0.7'
;
curl_setopt
(
$ch
,
CURLOPT_HTTPHEADER
,
$headers
);
$result
=
curl_exec
(
$ch
);
if
(
curl_errno
(
$ch
))
{
return
false
;
}
curl_close
(
$ch
);
return
$result
;
}
}
if
(
!
function_exists
(
'getZingUrlGetCounter'
))
{
if
(
!
function_exists
(
'getZingUrlGetCounter'
))
{
/**
/**
* Hàm tạo URL cho từng bài hát dựa vào ID chuỗi của nó để lấy lượt nghe
* Hàm tạo URL cho từng bài hát dựa vào ID chuỗi của nó để lấy lượt nghe
...
...
app/Repositories/TrackRepository.php
View file @
6ad20bc1
...
@@ -435,5 +435,38 @@ class TrackRepository extends BaseRepository
...
@@ -435,5 +435,38 @@ class TrackRepository extends BaseRepository
return
$tracks
;
return
$tracks
;
}
}
public
function
getNctTrackListen
(
$inserted
)
{
return
file_get_contents
(
'https://www.nhaccuatui.com/interaction/api/counter?listSongIds=6217224'
);
$tracks
=
[];
//
if
(
count
(
$inserted
)
>
0
)
{
foreach
(
$inserted
as
$key
=>
$track
)
{
$url
=
$track
[
'link_nct'
];
//$dom = new Htmldom($url);
$dom
=
file_get_contents
(
$url
);
//print_r($dom);
//sleep(5);
/* Lấy tên bài hát ------------------------------------------------------------------------------------------ */
/*$element_track_title = 'div.show_listen span';
foreach ($dom->find($element_track_title) as $data) {
$tracks[$track['id']] = str_replace([], [], $data->innertext);
}*/
}
return
$tracks
;
}
else
{
return
null
;
}
}
}
}
\ No newline at end of file
config/app.php
View file @
6ad20bc1
...
@@ -169,6 +169,7 @@ return [
...
@@ -169,6 +169,7 @@ return [
Laravel\Tinker\TinkerServiceProvider
::
class
,
Laravel\Tinker\TinkerServiceProvider
::
class
,
Yangqi\Htmldom\HtmldomServiceProvider
::
class
,
Yangqi\Htmldom\HtmldomServiceProvider
::
class
,
Maatwebsite\Excel\ExcelServiceProvider
::
class
,
Maatwebsite\Excel\ExcelServiceProvider
::
class
,
Ixudra\Curl\CurlServiceProvider
::
class
,
/*
/*
* Application Service Providers...
* Application Service Providers...
...
@@ -229,6 +230,7 @@ return [
...
@@ -229,6 +230,7 @@ return [
'View'
=>
Illuminate\Support\Facades\View
::
class
,
'View'
=>
Illuminate\Support\Facades\View
::
class
,
'Htmldom'
=>
Yangqi\Htmldom\Htmldom
::
class
,
'Htmldom'
=>
Yangqi\Htmldom\Htmldom
::
class
,
'Excel'
=>
Maatwebsite\Excel\Facades\Excel
::
class
,
'Excel'
=>
Maatwebsite\Excel\Facades\Excel
::
class
,
'Curl'
=>
Ixudra\Curl\Facades\Curl
::
class
,
],
],
...
...
packages/yangqi/htmldom/src/Yangqi/Htmldom/Htmldomnode.php
View file @
6ad20bc1
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
*/
*/
class
Htmldomnode
class
Htmldomnode
{
{
public
$nodetype
=
HDOM_TYPE_TEXT
;
public
$nodetype
=
YANGQI_
HDOM_TYPE_TEXT
;
public
$tag
=
'text'
;
public
$tag
=
'text'
;
public
$attr
=
array
();
public
$attr
=
array
();
public
$children
=
array
();
public
$children
=
array
();
...
@@ -110,9 +110,9 @@ class Htmldomnode
...
@@ -110,9 +110,9 @@ class Htmldomnode
}
}
$string
.=
" HDOM_INNER_INFO: '"
;
$string
.=
" HDOM_INNER_INFO: '"
;
if
(
isset
(
$node
->
_
[
HDOM_INFO_INNER
]))
if
(
isset
(
$node
->
_
[
YANGQI_
HDOM_INFO_INNER
]))
{
{
$string
.=
$node
->
_
[
HDOM_INFO_INNER
]
.
"'"
;
$string
.=
$node
->
_
[
YANGQI_
HDOM_INFO_INNER
]
.
"'"
;
}
}
else
else
{
{
...
@@ -249,8 +249,8 @@ class Htmldomnode
...
@@ -249,8 +249,8 @@ class Htmldomnode
// get dom node's inner html
// get dom node's inner html
function
innertext
()
function
innertext
()
{
{
if
(
isset
(
$this
->
_
[
HDOM_INFO_INNER
]))
return
$this
->
_
[
HDOM_INFO_INNER
];
if
(
isset
(
$this
->
_
[
YANGQI_HDOM_INFO_INNER
]))
return
$this
->
_
[
YANGQI_
HDOM_INFO_INNER
];
if
(
isset
(
$this
->
_
[
HDOM_INFO_TEXT
]))
return
$this
->
dom
->
restore_noise
(
$this
->
_
[
HDOM_INFO_TEXT
]);
if
(
isset
(
$this
->
_
[
YANGQI_HDOM_INFO_TEXT
]))
return
$this
->
dom
->
restore_noise
(
$this
->
_
[
YANGQI_
HDOM_INFO_TEXT
]);
$ret
=
''
;
$ret
=
''
;
foreach
(
$this
->
nodes
as
$n
)
foreach
(
$this
->
nodes
as
$n
)
...
@@ -283,24 +283,24 @@ class Htmldomnode
...
@@ -283,24 +283,24 @@ class Htmldomnode
call_user_func_array
(
$this
->
dom
->
callback
,
array
(
$this
));
call_user_func_array
(
$this
->
dom
->
callback
,
array
(
$this
));
}
}
if
(
isset
(
$this
->
_
[
HDOM_INFO_OUTER
]))
return
$this
->
_
[
HDOM_INFO_OUTER
];
if
(
isset
(
$this
->
_
[
YANGQI_HDOM_INFO_OUTER
]))
return
$this
->
_
[
YANGQI_
HDOM_INFO_OUTER
];
if
(
isset
(
$this
->
_
[
HDOM_INFO_TEXT
]))
return
$this
->
dom
->
restore_noise
(
$this
->
_
[
HDOM_INFO_TEXT
]);
if
(
isset
(
$this
->
_
[
YANGQI_HDOM_INFO_TEXT
]))
return
$this
->
dom
->
restore_noise
(
$this
->
_
[
YANGQI_
HDOM_INFO_TEXT
]);
// render begin tag
// render begin tag
if
(
$this
->
dom
&&
$this
->
dom
->
nodes
[
$this
->
_
[
HDOM_INFO_BEGIN
]])
if
(
$this
->
dom
&&
$this
->
dom
->
nodes
[
$this
->
_
[
YANGQI_
HDOM_INFO_BEGIN
]])
{
{
$ret
=
$this
->
dom
->
nodes
[
$this
->
_
[
HDOM_INFO_BEGIN
]]
->
makeup
();
$ret
=
$this
->
dom
->
nodes
[
$this
->
_
[
YANGQI_
HDOM_INFO_BEGIN
]]
->
makeup
();
}
else
{
}
else
{
$ret
=
""
;
$ret
=
""
;
}
}
// render inner text
// render inner text
if
(
isset
(
$this
->
_
[
HDOM_INFO_INNER
]))
if
(
isset
(
$this
->
_
[
YANGQI_
HDOM_INFO_INNER
]))
{
{
// If it's a br tag... don't return the HDOM_INNER_INFO that we may or may not have added.
// If it's a br tag... don't return the HDOM_INNER_INFO that we may or may not have added.
if
(
$this
->
tag
!=
"br"
)
if
(
$this
->
tag
!=
"br"
)
{
{
$ret
.=
$this
->
_
[
HDOM_INFO_INNER
];
$ret
.=
$this
->
_
[
YANGQI_
HDOM_INFO_INNER
];
}
}
}
else
{
}
else
{
if
(
$this
->
nodes
)
if
(
$this
->
nodes
)
...
@@ -313,7 +313,7 @@ class Htmldomnode
...
@@ -313,7 +313,7 @@ class Htmldomnode
}
}
// render end tag
// render end tag
if
(
isset
(
$this
->
_
[
HDOM_INFO_END
])
&&
$this
->
_
[
HDOM_INFO_END
]
!=
0
)
if
(
isset
(
$this
->
_
[
YANGQI_HDOM_INFO_END
])
&&
$this
->
_
[
YANGQI_
HDOM_INFO_END
]
!=
0
)
$ret
.=
'</'
.
$this
->
tag
.
'>'
;
$ret
.=
'</'
.
$this
->
tag
.
'>'
;
return
$ret
;
return
$ret
;
}
}
...
@@ -321,10 +321,10 @@ class Htmldomnode
...
@@ -321,10 +321,10 @@ class Htmldomnode
// get dom node's plain text
// get dom node's plain text
function
text
()
function
text
()
{
{
if
(
isset
(
$this
->
_
[
HDOM_INFO_INNER
]))
return
$this
->
_
[
HDOM_INFO_INNER
];
if
(
isset
(
$this
->
_
[
YANGQI_HDOM_INFO_INNER
]))
return
$this
->
_
[
YANGQI_
HDOM_INFO_INNER
];
switch
(
$this
->
nodetype
)
switch
(
$this
->
nodetype
)
{
{
case
HDOM_TYPE_TEXT
:
return
$this
->
dom
->
restore_noise
(
$this
->
_
[
HDOM_INFO_TEXT
]);
case
YANGQI_HDOM_TYPE_TEXT
:
return
$this
->
dom
->
restore_noise
(
$this
->
_
[
YANGQI_
HDOM_INFO_TEXT
]);
case
HDOM_TYPE_COMMENT
:
return
''
;
case
HDOM_TYPE_COMMENT
:
return
''
;
case
HDOM_TYPE_UNKNOWN
:
return
''
;
case
HDOM_TYPE_UNKNOWN
:
return
''
;
}
}
...
@@ -365,7 +365,7 @@ class Htmldomnode
...
@@ -365,7 +365,7 @@ class Htmldomnode
function
makeup
()
function
makeup
()
{
{
// text, comment, unknown
// text, comment, unknown
if
(
isset
(
$this
->
_
[
HDOM_INFO_TEXT
]))
return
$this
->
dom
->
restore_noise
(
$this
->
_
[
HDOM_INFO_TEXT
]);
if
(
isset
(
$this
->
_
[
YANGQI_HDOM_INFO_TEXT
]))
return
$this
->
dom
->
restore_noise
(
$this
->
_
[
YANGQI_
HDOM_INFO_TEXT
]);
$ret
=
'<'
.
$this
->
tag
;
$ret
=
'<'
.
$this
->
tag
;
$i
=
-
1
;
$i
=
-
1
;
...
@@ -393,7 +393,7 @@ class Htmldomnode
...
@@ -393,7 +393,7 @@ class Htmldomnode
}
}
}
}
$ret
=
$this
->
dom
->
restore_noise
(
$ret
);
$ret
=
$this
->
dom
->
restore_noise
(
$ret
);
return
$ret
.
$this
->
_
[
HDOM_INFO_ENDSPACE
]
.
'>'
;
return
$ret
.
$this
->
_
[
YANGQI_
HDOM_INFO_ENDSPACE
]
.
'>'
;
}
}
// find elements by css selector
// find elements by css selector
...
@@ -410,9 +410,9 @@ class Htmldomnode
...
@@ -410,9 +410,9 @@ class Htmldomnode
// The change on the below line was documented on the sourceforge code tracker id 2788009
// The change on the below line was documented on the sourceforge code tracker id 2788009
// used to be: if (($levle=count($selectors[0]))===0) return array();
// used to be: if (($levle=count($selectors[0]))===0) return array();
if
((
$levle
=
count
(
$selectors
[
$c
]))
===
0
)
return
array
();
if
((
$levle
=
count
(
$selectors
[
$c
]))
===
0
)
return
array
();
if
(
!
isset
(
$this
->
_
[
HDOM_INFO_BEGIN
]))
return
array
();
if
(
!
isset
(
$this
->
_
[
YANGQI_
HDOM_INFO_BEGIN
]))
return
array
();
$head
=
array
(
$this
->
_
[
HDOM_INFO_BEGIN
]
=>
1
);
$head
=
array
(
$this
->
_
[
YANGQI_
HDOM_INFO_BEGIN
]
=>
1
);
// handle descendant selectors, no recursive!
// handle descendant selectors, no recursive!
for
(
$l
=
0
;
$l
<
$levle
;
++
$l
)
for
(
$l
=
0
;
$l
<
$levle
;
++
$l
)
...
@@ -466,7 +466,7 @@ class Htmldomnode
...
@@ -466,7 +466,7 @@ class Htmldomnode
{
{
if
(
$tag
===
'*'
||
$tag
===
$c
->
tag
)
{
if
(
$tag
===
'*'
||
$tag
===
$c
->
tag
)
{
if
(
++
$count
==
$key
)
{
if
(
++
$count
==
$key
)
{
$ret
[
$c
->
_
[
HDOM_INFO_BEGIN
]]
=
1
;
$ret
[
$c
->
_
[
YANGQI_
HDOM_INFO_BEGIN
]]
=
1
;
return
;
return
;
}
}
}
}
...
@@ -474,17 +474,17 @@ class Htmldomnode
...
@@ -474,17 +474,17 @@ class Htmldomnode
return
;
return
;
}
}
$end
=
(
!
empty
(
$this
->
_
[
HDOM_INFO_END
]))
?
$this
->
_
[
HDOM_INFO_END
]
:
0
;
$end
=
(
!
empty
(
$this
->
_
[
YANGQI_HDOM_INFO_END
]))
?
$this
->
_
[
YANGQI_
HDOM_INFO_END
]
:
0
;
if
(
$end
==
0
)
{
if
(
$end
==
0
)
{
$parent
=
$this
->
parent
;
$parent
=
$this
->
parent
;
while
(
!
isset
(
$parent
->
_
[
HDOM_INFO_END
])
&&
$parent
!==
null
)
{
while
(
!
isset
(
$parent
->
_
[
YANGQI_
HDOM_INFO_END
])
&&
$parent
!==
null
)
{
$end
-=
1
;
$end
-=
1
;
$parent
=
$parent
->
parent
;
$parent
=
$parent
->
parent
;
}
}
$end
+=
$parent
->
_
[
HDOM_INFO_END
];
$end
+=
$parent
->
_
[
YANGQI_
HDOM_INFO_END
];
}
}
for
(
$i
=
$this
->
_
[
HDOM_INFO_BEGIN
]
+
1
;
$i
<
$end
;
++
$i
)
{
for
(
$i
=
$this
->
_
[
YANGQI_
HDOM_INFO_BEGIN
]
+
1
;
$i
<
$end
;
++
$i
)
{
$node
=
$this
->
dom
->
nodes
[
$i
];
$node
=
$this
->
dom
->
nodes
[
$i
];
$pass
=
true
;
$pass
=
true
;
...
@@ -641,10 +641,10 @@ class Htmldomnode
...
@@ -641,10 +641,10 @@ class Htmldomnode
switch
(
$name
)
switch
(
$name
)
{
{
case
'outertext'
:
return
$this
->
_
[
HDOM_INFO_OUTER
]
=
$value
;
case
'outertext'
:
return
$this
->
_
[
YANGQI_
HDOM_INFO_OUTER
]
=
$value
;
case
'innertext'
:
case
'innertext'
:
if
(
isset
(
$this
->
_
[
HDOM_INFO_TEXT
]))
return
$this
->
_
[
HDOM_INFO_TEXT
]
=
$value
;
if
(
isset
(
$this
->
_
[
YANGQI_HDOM_INFO_TEXT
]))
return
$this
->
_
[
YANGQI_
HDOM_INFO_TEXT
]
=
$value
;
return
$this
->
_
[
HDOM_INFO_INNER
]
=
$value
;
return
$this
->
_
[
YANGQI_
HDOM_INFO_INNER
]
=
$value
;
}
}
if
(
!
isset
(
$this
->
attr
[
$name
]))
if
(
!
isset
(
$this
->
attr
[
$name
]))
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment