Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
crawler.vmusicchart.vn
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Phạm Văn Đoan
crawler.vmusicchart.vn
Commits
f63f3efd
Commit
f63f3efd
authored
Mar 29, 2020
by
Phạm Văn Đoan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
lấy lượt nghe bài hát
parent
f3fbe27b
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
299 additions
and
1 deletion
+299
-1
CrawlerTrackListenCommand.php
app/Console/Commands/CrawlerTrackListenCommand.php
+141
-0
Kernel.php
app/Console/Kernel.php
+6
-0
Constants.php
app/Helpers/Constants.php
+10
-0
Functions.php
app/Helpers/Functions.php
+0
-0
CrawlerListen.php
app/Models/CrawlerListen.php
+26
-0
CrawlerListenHistory.php
app/Models/CrawlerListenHistory.php
+21
-0
Crontjob.php
app/Models/Crontjob.php
+2
-1
TrackRepository.php
app/Repositories/TrackRepository.php
+86
-0
api.php
config/api.php
+7
-0
No files found.
app/Console/Commands/CrawlerTrackListenCommand.php
0 → 100644
View file @
f63f3efd
<?php
namespace
App\Console\Commands
;
use
App\Models\CrawlerListen
;
use
App\Models\Crontjob
;
use
App\Repositories\TrackRepository
;
use
Carbon\Carbon
;
use
Illuminate\Console\Command
;
class
CrawlerTrackListenCommand
extends
Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected
$signature
=
'crawler:get-listen {from_page?}'
;
/**
* The console command description.
*
* @var string
*/
protected
$description
=
'Lấy lượt nghe của bài hát ở các hệ thống dựa vào tham số truyền vào'
;
protected
$trackRepository
;
/**
* Create a new command instance.
*
* CrawlerTrackListenCommand constructor.
* @param TrackRepository $trackRepository
*/
public
function
__construct
(
TrackRepository
$trackRepository
)
{
parent
::
__construct
();
$this
->
trackRepository
=
$trackRepository
;
}
/**
* Execute the console command.
*
* @return mixed
*/
public
function
handle
()
{
// Ghi log xem có gọi không
Crontjob
::
create
([
'message'
=>
'CrawlerTrackListenCommand is called at '
.
Carbon
::
now
()]);
$from_page
=
$this
->
argument
(
'from_page'
);
if
(
empty
(
$from_page
)
||
!
in_array
(
$from_page
,
[
'zing'
,
'nct'
,
'nhacvn'
,
'keeng'
]))
{
$from_page
=
$this
->
choice
(
'Chọn trang nhạc '
,
[
'zing'
,
'nct'
,
'nhacvn'
,
'keeng'
]);
}
echo
"
\n
Bắt đầu xử lý lấy lượt nghe từ: "
.
$from_page
;
$crawlered
=
$this
->
trackRepository
->
getCrawlerListenInserted
(
$from_page
,
false
);
echo
"
\n
Tổng số bài hát đã lấy lượt nghe hôm nay: "
.
count
(
$crawlered
);
// Lấy mảng bài hát theo từng hệ thống nhạc
$inserted
=
$this
->
trackRepository
->
getTrackBySource
(
$from_page
,
$crawlered
);
echo
"
\n
Tổng số bài hát cần lấy lượt nghe: "
.
count
(
$inserted
);
if
(
count
(
$inserted
)
>
0
)
{
$data_listen
=
[];
foreach
(
$inserted
as
$key
=>
$track
)
{
$listen
=
$this
->
privateGetListen
(
$from_page
,
$track
);
if
(
$listen
>
0
)
{
$data_listen
[
$track
[
'id'
]]
=
$listen
;
}
echo
"
\n
"
.
(
$key
+
1
)
.
". "
.
$track
[
'title'
]
.
" (Track ID= "
.
$track
[
'id'
]
.
"): "
.
$listen
;
}
if
(
count
(
$data_listen
)
>
0
)
{
foreach
(
$data_listen
as
$track_id
=>
$listen
)
{
switch
(
$from_page
)
{
case
'zing'
:
CrawlerListen
::
updateOrCreate
(
[
'track_id'
=>
$track_id
],
[
'today_zing'
=>
$listen
,
'zing_crawler_at'
=>
Carbon
::
now
()]
);
break
;
case
'nct'
:
CrawlerListen
::
updateOrCreate
(
[
'track_id'
=>
$track_id
],
[
'today_nct'
=>
$listen
,
'nct_crawler_at'
=>
Carbon
::
now
()]
);
break
;
}
}
}
}
echo
"
\n
Lấy số lượt nghe kết thúc."
;
}
/**
* Hàm lấy lượt nghe thông qua api của các trang nhạc
*
* @param $src
* @param $track
* @return int
*/
private
function
privateGetListen
(
$src
,
$track
)
{
$listen
=
0
;
switch
(
$src
)
{
case
'zing'
:
$url
=
getZingUrlGetCounter
(
$track
[
'id_zing'
]);
$curl
=
cURL
(
$url
);
$response
=
json_decode
(
$curl
);
if
(
isset
(
$response
->
err
)
&&
$response
->
err
==
0
)
{
if
(
isset
(
$response
->
data
)
&&
isset
(
$response
->
data
->
total
)
&&
$response
->
data
->
total
>
0
)
{
$listen
=
$response
->
data
->
total
;
}
}
break
;
case
'nct'
:
$url
=
getNctUrlGetCounter
(
$track
[
'id_nct'
]);
$curl
=
cURL
(
$url
);
$response
=
json_decode
(
$curl
);
if
(
isset
(
$response
->
error_code
)
&&
$response
->
error_code
==
0
)
{
if
(
isset
(
$response
->
data
)
&&
isset
(
$response
->
data
->
songs
)
&&
$response
->
data
->
songs
->
{
$track
[
'id_nct'
]}
>
0
)
{
$listen
=
$response
->
data
->
songs
->
{
$track
[
'id_nct'
]};
}
}
break
;
}
return
$listen
;
}
}
app/Console/Kernel.php
View file @
f63f3efd
...
...
@@ -2,10 +2,12 @@
namespace
App\Console
;
use
App\Console\Commands\CrawlerTrackListenCommand
;
use
App\Console\Commands\CrontjobCommand
;
use
App\Console\Commands\Keeng\KeengCrawlerTrackCommand
;
use
App\Console\Commands\Nct\NctCrawlerTrackCommand
;
use
App\Console\Commands\NhacVn\NhacVnCrawlerTrackCommand
;
use
App\Console\Commands\Zing\ZingCrawlerListenCommand
;
use
App\Console\Commands\Zing\ZingCrawlerNewRealeaseCommand
;
use
App\Console\Commands\Zing\ZingCrawlerTrackCommand
;
use
App\Console\Commands\CrawlerTrackImageCommand
;
...
...
@@ -28,6 +30,10 @@ class Kernel extends ConsoleKernel
KeengCrawlerTrackCommand
::
class
,
CrawlerTrackImageCommand
::
class
,
//ZingCrawlerListenCommand::class,
CrawlerTrackListenCommand
::
class
];
/**
...
...
app/Helpers/Constants.php
View file @
f63f3efd
...
...
@@ -5,6 +5,11 @@ namespace App\Helpers;
class
Constants
{
const
TABLE_ARTISTS
=
'artists'
;
const
TABLE_COUNT_VIEWS
=
'count_views'
;
const
TABLE_CRAWLER_LISTEN_HISTORIES
=
'crawler_listen_histories'
;
const
TABLE_CRAWLER_LISTENS
=
'crawler_listens'
;
const
TABLE_CRONTJOBS
=
'crontjobs'
;
const
TABLE_FAILED_JOBS
=
'failed_jobs'
;
const
TABLE_JOBS
=
'jobs'
;
const
TABLE_MIGRATIONS
=
'migrations'
;
...
...
@@ -19,9 +24,11 @@ class Constants
const
TABLE_PERMISSIONS
=
'permissions'
;
const
TABLE_ROLE_USER
=
'role_user'
;
const
TABLE_ROLES
=
'roles'
;
const
TABLE_TRACK_ARTIST
=
'track_artist'
;
const
TABLE_TRACK_USER
=
'track_user'
;
const
TABLE_TRACKS
=
'tracks'
;
const
TABLE_USER_LOGS
=
'user_logs'
;
const
TABLE_USERS
=
'users'
;
...
...
@@ -117,4 +124,6 @@ class Constants
const
VMUSICCHART_IMG_EMPTY_JPG
=
'/home/aseanvn/public_html/vmusicchart.dcv.vn/uploads/empty-1x1-jpg.jpg'
;
const
VMUSICCHART_IMG_EMPTY_GIF
=
'/home/aseanvn/public_html/vmusicchart.dcv.vn/uploads/empty-1x1-gif.gif'
;
const
CRAWLER_LISTEN_LIMIT
=
500
;
}
\ No newline at end of file
app/Helpers/Functions.php
View file @
f63f3efd
This diff is collapsed.
Click to expand it.
app/Models/CrawlerListen.php
0 → 100644
View file @
f63f3efd
<?php
namespace
App\Models
;
use
App\Helpers\Constants
;
use
Illuminate\Database\Eloquent\Model
;
class
CrawlerListen
extends
Model
{
protected
$table
=
Constants
::
TABLE_CRAWLER_LISTENS
;
public
$timestamps
=
true
;
protected
$fillable
=
[
'track_id'
,
'yesterday_zing'
,
'yesterday_nct'
,
'yesterday_nhacvn'
,
'yesterday_keeng'
,
'today_zing'
,
'today_nct'
,
'today_nhacvn'
,
'today_keeng'
,
'date_zing'
,
'date_nct'
,
'date_nhacvn'
,
'date_keeng'
,
'week_zing'
,
'week_nct'
,
'week_nhacvn'
,
'week_keeng'
,
'month_zing'
,
'month_nct'
,
'week_nhacvn'
,
'month_keeng'
,
'year_zing'
,
'year_nct'
,
'year_nhacvn'
,
'year_keeng'
,
'alltime_zing'
,
'alltime_nct'
,
'alltime_nhacvn'
,
'alltime_keeng'
,
'zing_crawler_at'
,
'nct_crawler_at'
,
'nhacvn_crawler_at'
,
'keeng_crawler_at'
];
}
app/Models/CrawlerListenHistory.php
0 → 100644
View file @
f63f3efd
<?php
namespace
App\Models
;
use
App\Helpers\Constants
;
use
Illuminate\Database\Eloquent\Model
;
class
CrawlerListenHistory
extends
Model
{
protected
$table
=
Constants
::
TABLE_CRAWLER_LISTEN_HISTORIES
;
public
$timestamps
=
true
;
protected
$fillable
=
[
'track_id'
,
'zing_count'
,
'nct_count'
,
'nhacvn_count'
,
'keeng_count'
];
}
app/Models/Crontjob.php
View file @
f63f3efd
...
...
@@ -2,11 +2,12 @@
namespace
App\Models
;
use
App\Helpers\Constants
;
use
Illuminate\Database\Eloquent\Model
;
class
Crontjob
extends
Model
{
protected
$table
=
'crontjobs'
;
protected
$table
=
Constants
::
TABLE_CRONTJOBS
;
public
$timestamps
=
true
;
...
...
app/Repositories/TrackRepository.php
View file @
f63f3efd
...
...
@@ -2,6 +2,8 @@
namespace
App\Repositories
;
use
App\Helpers\Constants
;
use
App\Models\CrawlerListen
;
use
App\Models\Track
;
use
Sunra\PhpSimple\HtmlDomParser
;
use
Yangqi\Htmldom\Htmldom
;
...
...
@@ -279,5 +281,88 @@ class TrackRepository extends BaseRepository
return
$result
;
}
/**
* Lấy ds bài hát chưa lấy lượt nghe trong ngày theo từng hệ thống nhạc
* Bài nào đã lấy rồi thì sẽ ko lấy nữa
* Mỗi lần quét 500 bài cho đến hết
*
* @param string $src
* @param $crawlered
* @param int $limit
*
* @return array
*/
public
function
getTrackBySource
(
$src
=
'zing'
,
$crawlered
,
$limit
=
Constants
::
CRAWLER_LISTEN_LIMIT
)
{
$query
=
Track
::
select
([
'id'
,
'id_zing'
,
'id_nct'
,
'id_nhacvn'
,
'id_keeng'
,
'title'
]);
if
(
is_array
(
$crawlered
)
&&
count
(
$crawlered
)
>
0
)
{
$query
->
whereNotIn
(
'id'
,
$crawlered
);
}
switch
(
$src
)
{
case
'zing'
:
$query
->
whereNotNull
(
'id_zing'
)
->
where
(
'id_zing'
,
'<>'
,
''
);
break
;
case
'nct'
:
$query
->
whereNotNull
(
'id_nct'
)
->
where
(
'id_nct'
,
'<>'
,
''
);
break
;
case
'nhacvn'
:
$query
->
whereNotNull
(
'id_nhacvn'
)
->
where
(
'id_nhacvn'
,
'<>'
,
''
);
break
;
case
'keeng'
:
$query
->
whereNotNull
(
'id_keeng'
)
->
where
(
'id_keeng'
,
'<>'
,
''
);
break
;
}
$query
->
take
((
$limit
<
1
)
?
Constants
::
CRAWLER_LISTEN_LIMIT
:
$limit
);
return
$query
->
get
()
->
toArray
();
}
/**
* Lấy ds những bài hát đã lấy lượt nghe trong ngày, để lần quét sau sẽ bỏ qua
* Mỗi lần quét 300-500 bài trong tracks và bỏ qua những bài đã lấy lượt nghe trong ngày ở bảng crawler_listens
* Lấy những bài hát có lượng nghe trong ngày = 0 để cập nhật lại
* Có thể tùy biến thêm ($is_update_all): cập nhật toàn bộ hoặc chỉ cập nhập những bài chưa lấy lượt nghe
*
* @param string $src
* @param boolean $is_update_all
*
* @return array
*/
public
function
getCrawlerListenInserted
(
$src
=
'zing'
,
$is_update_all
=
false
)
{
$min_date
=
date
(
'Y-m-d 00:00:00'
);
$max_date
=
date
(
'Y-m-d 23:59:59'
);
if
(
$is_update_all
)
{
$min_date
=
date
(
'Y-m-d 00:00:00'
,
time
()
+
86400
);
$max_date
=
date
(
'Y-m-d 23:59:59'
,
time
()
+
86400
);
}
$query
=
CrawlerListen
::
select
([
'id'
,
'track_id'
]);
switch
(
$src
)
{
case
'zing'
:
$query
->
where
(
'zing_crawler_at'
,
'>='
,
$min_date
)
->
where
(
'zing_crawler_at'
,
'<='
,
$max_date
)
->
where
(
'today_zing'
,
'>'
,
0
);
break
;
case
'nct'
:
$query
->
where
(
'nct_crawler_at'
,
'>='
,
$min_date
)
->
where
(
'nct_crawler_at'
,
'<='
,
$max_date
)
->
where
(
'today_nct'
,
'>'
,
0
);
break
;
case
'nhacvn'
:
$query
->
where
(
'nhacvn_crawler_at'
,
'>='
,
$min_date
)
->
where
(
'nhacvn_crawler_at'
,
'<='
,
$max_date
)
->
where
(
'today_nhacvn'
,
'>'
,
0
);
break
;
case
'keeng'
:
$query
->
where
(
'keeng_crawler_at'
,
'>='
,
$min_date
)
->
where
(
'keeng_crawler_at'
,
'<='
,
$max_date
)
->
where
(
'today_keeng'
,
'>'
,
0
);
break
;
}
$result
=
$query
->
get
();
return
collect
(
$result
)
->
map
(
function
(
$item
)
{
return
$item
->
track_id
;
})
->
all
();
}
}
\ No newline at end of file
config/api.php
View file @
f63f3efd
...
...
@@ -85,6 +85,13 @@ return [
'api_get_rank'
=>
'http://vip.service.keeng.vn:8080/KeengWSRestful//ws/common/getRankDetail?item_type=1&rank_type=50'
],
'listen'
=>
[
'zing'
=>
'https://mp3.zing.vn/xhr/get-counter?id=ZWB0EF7E&type=audio'
,
'nct'
=>
'https://www.nhaccuatui.com/interaction/api/counter?listSongIds=6238530'
,
'nhacvn'
=>
null
,
'keeng'
=>
null
],
];
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment