Commit 09338dae by Phạm Văn Đoan

tạo task lấy bài hát từ NCT

parent 7caf7995
<?php
namespace App\Console\Commands\Nct;
use App\Helpers\Constants;
use App\Models\Crontjob;
use App\Models\Track;
use App\Repositories\TrackRepository;
use Carbon\Carbon;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
class NctCrawlerTrackCommand extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'nct:get-track';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Lấy thông tin bài hát từ NCT';
protected $trackRepository;
/**
* Create a new command instance.
*
* NctCrawlerTrackCommand constructor.
* @param TrackRepository $trackRepository
*/
public function __construct(TrackRepository $trackRepository)
{
parent::__construct();
$this->trackRepository = $trackRepository;
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
echo "\nBắt đầu xử lý lấy top 100 bài hát từ NCT.";
// Lấy mảng ID của Zing những bài hát đã insert
$inserted = $this->trackRepository->getTrackInserted('title', 'original', []);
// Lấy ds BXH về
$domain = config('api.nct.domain');
$url = config('api.nct.top100');
$url_playlist_top100 = config('api.nct.playlist_top100');
$tracks = $this->trackRepository->processNctGetTrackInfoFromTop100($url);
if (count($tracks['title']) == 0) return false;
$tracks_title = $tracks['title'];
$tracks_img = $tracks['img'];
$tracks_id_nct_str = $tracks['id_nct_str'];
$tracks_id_nct = $tracks['id_nct'];
$bulk_track_insert = [];
$bulk_track_update = [];
$key_insert = 0;
$key_update = 0;
foreach ($tracks_title as $key => $track) {
// Xử lý insert/update dựa vào tên bài hát
if (is_array($inserted) && count($inserted) > 0 && in_array($track, $inserted)) {
// Cập nhật
$track_db = Track::where('title', $track)->take(1)->get();
if (isset($track_db[0])) {
$track_db[0]->id_nct = $tracks_id_nct[$key];
$track_db[0]->id_nct_str = $tracks_id_nct_str[$key];
$track_db[0]->crawler_at = Carbon::now();
$track_db[0]->save();
}
$key_update++;
} else {
$bulk_track_insert[$key_insert]['title'] = $track;
$bulk_track_insert[$key_insert]['slug'] = null;
$bulk_track_insert[$key_insert]['userid'] = 0;
$bulk_track_insert[$key_insert]['tag'] = 'vmusic';
$bulk_track_insert[$key_insert]['genre'] = 1;
$bulk_track_insert[$key_insert]['download_hash'] = md5($tracks_id_nct[$key]);
$bulk_track_insert[$key_insert]['time'] = time();
//$bulk_track_insert[$key_insert]['link'] = null;
$bulk_track_insert[$key_insert]['id_nct'] = $tracks_id_nct[$key];
$bulk_track_insert[$key_insert]['id_nct_str'] = $tracks_id_nct_str[$key];
//$bulk_track_insert[$key_insert]['singer_list'] = $singer_list;
//$bulk_track_insert[$key_insert]['track_duration'] = isset($track->duration) ? $track->duration : 0;
//$bulk_track_insert[$key_insert]['src_thumbnail'] = isset($track->thumbnail) ? $track->thumbnail : null;
$bulk_track_insert[$key_insert]['src_thumbnail_medium'] = $tracks_img[$key];
//$bulk_track_insert[$key_insert]['link_mv'] = null;
$bulk_track_insert[$key_insert]['crawler_at'] = Carbon::now();
$key_insert++;
}
}
echo "\nSố bài hát insert: " . $key_insert;
echo "\nSố bài hát update: " . $key_update;
if (count($bulk_track_insert) > 0) {
DB::table(Constants::TABLE_TRACKS)->insert($bulk_track_insert);
echo "\nInsert thành công.";
} else {
echo "\nKhông lấy được bài hát nào.";
}
// Ghi log
Crontjob::create(['message' => 'NctCrawlerTrackCommand is called at ' . Carbon::now()]);
}
}
<?php
namespace App\Console\Commands\Nct;
use App\Repositories\TrackRepository;
use Illuminate\Console\Command;
class NctCrawlerUpdateTrackLinkCommand extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'nct:update-track-link';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Cập nhật link bài hát từ NCT';
protected $trackRepository;
/**
* Create a new command instance.
*
* NctCrawlerTrackCommand constructor.
* @param TrackRepository $trackRepository
*/
public function __construct(TrackRepository $trackRepository)
{
parent::__construct();
$this->trackRepository = $trackRepository;
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
//
}
}
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
namespace App\Console; namespace App\Console;
use App\Console\Commands\CrontjobCommand; use App\Console\Commands\CrontjobCommand;
use App\Console\Commands\Nct\NctCrawlerTrackCommand;
use App\Console\Commands\Zing\ZingCrawlerNewRealeaseCommand; use App\Console\Commands\Zing\ZingCrawlerNewRealeaseCommand;
use App\Console\Commands\Zing\ZingCrawlerTrackCommand; use App\Console\Commands\Zing\ZingCrawlerTrackCommand;
use App\Console\Commands\Zing\ZingCrawlerTrackImageCommand; use App\Console\Commands\Zing\ZingCrawlerTrackImageCommand;
...@@ -21,6 +22,8 @@ class Kernel extends ConsoleKernel ...@@ -21,6 +22,8 @@ class Kernel extends ConsoleKernel
ZingCrawlerTrackCommand::class, ZingCrawlerTrackCommand::class,
ZingCrawlerNewRealeaseCommand::class, ZingCrawlerNewRealeaseCommand::class,
ZingCrawlerTrackImageCommand::class, ZingCrawlerTrackImageCommand::class,
NctCrawlerTrackCommand::class,
]; ];
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
namespace App\Repositories; namespace App\Repositories;
use App\Models\Track; use App\Models\Track;
use Sunra\PhpSimple\HtmlDomParser;
use Yangqi\Htmldom\Htmldom;
class TrackRepository extends BaseRepository class TrackRepository extends BaseRepository
{ {
...@@ -64,5 +66,95 @@ class TrackRepository extends BaseRepository ...@@ -64,5 +66,95 @@ class TrackRepository extends BaseRepository
return $query->get()->toArray(); return $query->get()->toArray();
} }
public function processNctGetTrackInfoFromTop100($url)
{
$tracks = [];
$dom = getDom($url);
/* Lấy tên bài hát */
$element_track_title = 'div.box_info_field h3.h3 a';
foreach ($dom->find($element_track_title) as $data) {
$tracks['title'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->innertext);
}
/* Lấy đường dẫn ảnh của bài hát */
$element_track_img = 'div.box_info_field a img';
foreach ($dom->find($element_track_img) as $data) {
$tracks['img'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->{"data-src"});
}
/* Lấy link bài hát */
$element_track_link = 'div.box_info_field h3.h3 a';
foreach ($dom->find($element_track_link) as $data) {
$link = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->href);
$tracks['link'][] = $link;
}
/* Lấy tên ca sĩ hát */
$element_track_singer = 'div.box_info_field h4.list_name_singer a';
foreach ($dom->find($element_track_singer) as $data) {
$tracks['singer'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->innertext);
}
/* Lấy url thông tin cá nhân ca sĩ */
$element_singer_profile = 'div.box_info_field h4.list_name_singer a';
foreach ($dom->find($element_singer_profile) as $data) {
$tracks['url_profile'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->href);
}
/* Lấy ID dạng chuỗi của bài hát */
$element_track_id_str = 'div.box_song_action a.button_add_playlist';
foreach ($dom->find($element_track_id_str) as $data) {
$tracks['id_nct_str'][] = str_replace(['btnShowBoxPlaylist_', '</h2>'], ['', ''], $data->id);
}
/* Lấy ID dạng số của bài hát */
$element_track_id = 'div.box_info_field';
$start = '<span class=\"export_listen\" id="NCTCounter__';
$end = '">0<\/span>';
$rule = "/$start(.*)$end/msU";
foreach ($dom->find($element_track_id) as $data) {
$data_crawler = str_replace(['<!--', '-->'], ['', ''], $data);
preg_match($rule, $data_crawler, $result);
if (is_array($result) && count($result) == 2) {
$tracks['id_nct'][] = $result[1];
}
}
return $tracks;
}
public function processNctGetTrackInfoFromTop100Playlist($url)
{
$tracks = [];
$dom = new Htmldom($url);
/* Lấy ID dạng chuỗi của bài hát */
$element_track_id_str = 'li';
foreach ($dom->find($element_track_id_str) as $data) {
if (isset($data->key)) {
$tracks['id_nct_str'][] = str_replace([], [], $data->key);
}
}
/* Lấy link bài hát */
$element_track_link = 'li a';
foreach ($dom->find($element_track_link) as $data) {
if (isset($data->class) && $data->class == 'button_new_window') {
$tracks['link'][] = str_replace([], [], $data->href);
}
}
/* Lấy ID dạng số */
$element_track_id = 'li span';
foreach ($dom->find($element_track_id) as $data) {
if (isset($data->id) && isset($data->class) && $data->class == 'icon_listen') {
$tracks['id_nct'][] = str_replace(['NCTCounter_sg_', 'NCTCounter_pn_'], ['', ''], $data->id);
}
}
return $tracks;
}
} }
\ No newline at end of file
...@@ -6,8 +6,11 @@ ...@@ -6,8 +6,11 @@
"type": "project", "type": "project",
"require": { "require": {
"php": ">=5.6.4", "php": ">=5.6.4",
"ixudra/curl": "^6.19",
"laravel/framework": "5.4.*", "laravel/framework": "5.4.*",
"laravel/tinker": "~1.0" "laravel/tinker": "~1.0",
"sunra/php-simple-html-dom-parser": "^1.5",
"yangqi/htmldom": "^1.0"
}, },
"require-dev": { "require-dev": {
"fzaninotto/faker": "~1.4", "fzaninotto/faker": "~1.4",
......
...@@ -62,7 +62,13 @@ return [ ...@@ -62,7 +62,13 @@ return [
'playlist' => 'https://zingmp3.vn/api/playlist/get-playlist-detail?id=ZWZB969E&ctime=1584213996&sig=e295c92bed3ce58a29a2fb8c69f9e1934bab807ae6f43c546eeb7ca4242be599171c9566f23296fdc137ee156fd307db4cfa1a4b528e5d99773065ee10c6a603&api_key=38e8643fb0dc04e8d65b99994d3dafff', 'playlist' => 'https://zingmp3.vn/api/playlist/get-playlist-detail?id=ZWZB969E&ctime=1584213996&sig=e295c92bed3ce58a29a2fb8c69f9e1934bab807ae6f43c546eeb7ca4242be599171c9566f23296fdc137ee156fd307db4cfa1a4b528e5d99773065ee10c6a603&api_key=38e8643fb0dc04e8d65b99994d3dafff',
'new_release' => 'https://zingmp3.vn/api/chart/get-chart-new-release?ctime=1584214467&sig=66218d539436572b65c74d0c852550f7c7df0a3128aca9b5be13aa3dcd1d7c0bf49e3af459f75db87e3cc08f60c087684a0c92375ab3a8a784253502bbdf065f&api_key=38e8643fb0dc04e8d65b99994d3dafff', 'new_release' => 'https://zingmp3.vn/api/chart/get-chart-new-release?ctime=1584214467&sig=66218d539436572b65c74d0c852550f7c7df0a3128aca9b5be13aa3dcd1d7c0bf49e3af459f75db87e3cc08f60c087684a0c92375ab3a8a784253502bbdf065f&api_key=38e8643fb0dc04e8d65b99994d3dafff',
'artist' => 'https://zingmp3.vn/api/artist/get-list?id=IWZ9Z08I&type=genre&sort=listen&start=1&count=1000&ctime=1584216597&sig=e2b8e1e48ccd12f560959913a6901fc76c9b93e1cafecba2d3517d8ab57eeb44717aa41eca70338bcf8bc8c84def1c43367ef44c887589db4ed81a1e2c042303&api_key=38e8643fb0dc04e8d65b99994d3dafff', 'artist' => 'https://zingmp3.vn/api/artist/get-list?id=IWZ9Z08I&type=genre&sort=listen&start=1&count=1000&ctime=1584216597&sig=e2b8e1e48ccd12f560959913a6901fc76c9b93e1cafecba2d3517d8ab57eeb44717aa41eca70338bcf8bc8c84def1c43367ef44c887589db4ed81a1e2c042303&api_key=38e8643fb0dc04e8d65b99994d3dafff',
] ],
'nct' => [
'domain' => 'https://www.nhaccuatui.com/',
'top100' => 'https://www.nhaccuatui.com/top100/top-100-nhac-tre.m3liaiy6vVsF.html',
'playlist_top100' => 'https://www.nhaccuatui.com/playlist/top-100-nhac-tre-hay-nhat-va.m3liaiy6vVsF.html'
],
......
...@@ -167,6 +167,7 @@ return [ ...@@ -167,6 +167,7 @@ return [
* Package Service Providers... * Package Service Providers...
*/ */
Laravel\Tinker\TinkerServiceProvider::class, Laravel\Tinker\TinkerServiceProvider::class,
Yangqi\Htmldom\HtmldomServiceProvider::class,
/* /*
* Application Service Providers... * Application Service Providers...
...@@ -225,6 +226,7 @@ return [ ...@@ -225,6 +226,7 @@ return [
'URL' => Illuminate\Support\Facades\URL::class, 'URL' => Illuminate\Support\Facades\URL::class,
'Validator' => Illuminate\Support\Facades\Validator::class, 'Validator' => Illuminate\Support\Facades\Validator::class,
'View' => Illuminate\Support\Facades\View::class, 'View' => Illuminate\Support\Facades\View::class,
'Htmldom' => Yangqi\Htmldom\Htmldom::class,
], ],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment