From 09338dae7663264afdc930a023d960ed2fa6abfe Mon Sep 17 00:00:00 2001 From: doanpv <doanpv@dcv.vn> Date: Thu, 26 Mar 2020 08:46:20 +0700 Subject: [PATCH] tạo task lấy bài hát từ NCT --- app/Console/Commands/Nct/NctCrawlerTrackCommand.php | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ app/Console/Commands/Nct/NctCrawlerUpdateTrackLinkCommand.php | 47 +++++++++++++++++++++++++++++++++++++++++++++++ app/Console/Kernel.php | 3 +++ app/Repositories/TrackRepository.php | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ composer.json | 5 ++++- config/api.php | 8 +++++++- config/app.php | 2 ++ 7 files changed, 276 insertions(+), 2 deletions(-) create mode 100644 app/Console/Commands/Nct/NctCrawlerTrackCommand.php create mode 100644 app/Console/Commands/Nct/NctCrawlerUpdateTrackLinkCommand.php diff --git a/app/Console/Commands/Nct/NctCrawlerTrackCommand.php b/app/Console/Commands/Nct/NctCrawlerTrackCommand.php new file mode 100644 index 0000000..2d77ddf --- /dev/null +++ b/app/Console/Commands/Nct/NctCrawlerTrackCommand.php @@ -0,0 +1,121 @@ +<?php + +namespace App\Console\Commands\Nct; + +use App\Helpers\Constants; +use App\Models\Crontjob; +use App\Models\Track; +use App\Repositories\TrackRepository; +use Carbon\Carbon; +use Illuminate\Console\Command; +use Illuminate\Support\Facades\DB; + +class NctCrawlerTrackCommand extends Command +{ + /** + * The name and signature of the console command. + * + * @var string + */ + protected $signature = 'nct:get-track'; + + /** + * The console command description. + * + * @var string + */ + protected $description = 'Lấy thông tin bài hát từ NCT'; + + protected $trackRepository; + + /** + * Create a new command instance. + * + * NctCrawlerTrackCommand constructor. + * @param TrackRepository $trackRepository + */ + public function __construct(TrackRepository $trackRepository) + { + parent::__construct(); + $this->trackRepository = $trackRepository; + } + + /** + * Execute the console command. + * + * @return mixed + */ + public function handle() + { + echo "\nBắt đầu xử lý lấy top 100 bài hát từ NCT."; + + // Lấy mảng ID của Zing những bài hát đã insert + $inserted = $this->trackRepository->getTrackInserted('title', 'original', []); + + // Lấy ds BXH về + $domain = config('api.nct.domain'); + $url = config('api.nct.top100'); + $url_playlist_top100 = config('api.nct.playlist_top100'); + + $tracks = $this->trackRepository->processNctGetTrackInfoFromTop100($url); + + if (count($tracks['title']) == 0) return false; + + $tracks_title = $tracks['title']; + $tracks_img = $tracks['img']; + $tracks_id_nct_str = $tracks['id_nct_str']; + $tracks_id_nct = $tracks['id_nct']; + + $bulk_track_insert = []; + $bulk_track_update = []; + $key_insert = 0; + $key_update = 0; + foreach ($tracks_title as $key => $track) { + // Xử lý insert/update dựa vào tên bài hát + if (is_array($inserted) && count($inserted) > 0 && in_array($track, $inserted)) { + // Cập nhật + $track_db = Track::where('title', $track)->take(1)->get(); + if (isset($track_db[0])) { + $track_db[0]->id_nct = $tracks_id_nct[$key]; + $track_db[0]->id_nct_str = $tracks_id_nct_str[$key]; + $track_db[0]->crawler_at = Carbon::now(); + $track_db[0]->save(); + } + $key_update++; + } else { + $bulk_track_insert[$key_insert]['title'] = $track; + $bulk_track_insert[$key_insert]['slug'] = null; + $bulk_track_insert[$key_insert]['userid'] = 0; + $bulk_track_insert[$key_insert]['tag'] = 'vmusic'; + $bulk_track_insert[$key_insert]['genre'] = 1; + $bulk_track_insert[$key_insert]['download_hash'] = md5($tracks_id_nct[$key]); + $bulk_track_insert[$key_insert]['time'] = time(); + //$bulk_track_insert[$key_insert]['link'] = null; + $bulk_track_insert[$key_insert]['id_nct'] = $tracks_id_nct[$key]; + $bulk_track_insert[$key_insert]['id_nct_str'] = $tracks_id_nct_str[$key]; + //$bulk_track_insert[$key_insert]['singer_list'] = $singer_list; + //$bulk_track_insert[$key_insert]['track_duration'] = isset($track->duration) ? $track->duration : 0; + //$bulk_track_insert[$key_insert]['src_thumbnail'] = isset($track->thumbnail) ? $track->thumbnail : null; + $bulk_track_insert[$key_insert]['src_thumbnail_medium'] = $tracks_img[$key]; + //$bulk_track_insert[$key_insert]['link_mv'] = null; + $bulk_track_insert[$key_insert]['crawler_at'] = Carbon::now(); + + $key_insert++; + } + } + + echo "\nSố bài hát insert: " . $key_insert; + echo "\nSố bài hát update: " . $key_update; + + if (count($bulk_track_insert) > 0) { + DB::table(Constants::TABLE_TRACKS)->insert($bulk_track_insert); + echo "\nInsert thành công."; + } else { + echo "\nKhông lấy được bài hát nào."; + } + + // Ghi log + Crontjob::create(['message' => 'NctCrawlerTrackCommand is called at ' . Carbon::now()]); + + } +} diff --git a/app/Console/Commands/Nct/NctCrawlerUpdateTrackLinkCommand.php b/app/Console/Commands/Nct/NctCrawlerUpdateTrackLinkCommand.php new file mode 100644 index 0000000..6b70b9d --- /dev/null +++ b/app/Console/Commands/Nct/NctCrawlerUpdateTrackLinkCommand.php @@ -0,0 +1,47 @@ +<?php + +namespace App\Console\Commands\Nct; + +use App\Repositories\TrackRepository; +use Illuminate\Console\Command; + +class NctCrawlerUpdateTrackLinkCommand extends Command +{ + /** + * The name and signature of the console command. + * + * @var string + */ + protected $signature = 'nct:update-track-link'; + + /** + * The console command description. + * + * @var string + */ + protected $description = 'Cập nhật link bài hát từ NCT'; + + protected $trackRepository; + + /** + * Create a new command instance. + * + * NctCrawlerTrackCommand constructor. + * @param TrackRepository $trackRepository + */ + public function __construct(TrackRepository $trackRepository) + { + parent::__construct(); + $this->trackRepository = $trackRepository; + } + + /** + * Execute the console command. + * + * @return mixed + */ + public function handle() + { + // + } +} diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index 344cf0d..80b95a4 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -3,6 +3,7 @@ namespace App\Console; use App\Console\Commands\CrontjobCommand; +use App\Console\Commands\Nct\NctCrawlerTrackCommand; use App\Console\Commands\Zing\ZingCrawlerNewRealeaseCommand; use App\Console\Commands\Zing\ZingCrawlerTrackCommand; use App\Console\Commands\Zing\ZingCrawlerTrackImageCommand; @@ -21,6 +22,8 @@ class Kernel extends ConsoleKernel ZingCrawlerTrackCommand::class, ZingCrawlerNewRealeaseCommand::class, ZingCrawlerTrackImageCommand::class, + NctCrawlerTrackCommand::class, + ]; diff --git a/app/Repositories/TrackRepository.php b/app/Repositories/TrackRepository.php index 0bb658b..66f7be1 100644 --- a/app/Repositories/TrackRepository.php +++ b/app/Repositories/TrackRepository.php @@ -3,6 +3,8 @@ namespace App\Repositories; use App\Models\Track; +use Sunra\PhpSimple\HtmlDomParser; +use Yangqi\Htmldom\Htmldom; class TrackRepository extends BaseRepository { @@ -64,5 +66,95 @@ class TrackRepository extends BaseRepository return $query->get()->toArray(); } + public function processNctGetTrackInfoFromTop100($url) + { + $tracks = []; + $dom = getDom($url); + + /* Lấy tên bài hát */ + $element_track_title = 'div.box_info_field h3.h3 a'; + foreach ($dom->find($element_track_title) as $data) { + $tracks['title'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->innertext); + } + + /* Lấy đường dẫn ảnh của bài hát */ + $element_track_img = 'div.box_info_field a img'; + foreach ($dom->find($element_track_img) as $data) { + $tracks['img'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->{"data-src"}); + } + + /* Lấy link bài hát */ + $element_track_link = 'div.box_info_field h3.h3 a'; + foreach ($dom->find($element_track_link) as $data) { + $link = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->href); + $tracks['link'][] = $link; + } + + /* Lấy tên ca sĩ hát */ + $element_track_singer = 'div.box_info_field h4.list_name_singer a'; + foreach ($dom->find($element_track_singer) as $data) { + $tracks['singer'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->innertext); + } + + /* Lấy url thông tin cá nhân ca sĩ */ + $element_singer_profile = 'div.box_info_field h4.list_name_singer a'; + foreach ($dom->find($element_singer_profile) as $data) { + $tracks['url_profile'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->href); + } + + /* Lấy ID dạng chuỗi của bài hát */ + $element_track_id_str = 'div.box_song_action a.button_add_playlist'; + foreach ($dom->find($element_track_id_str) as $data) { + $tracks['id_nct_str'][] = str_replace(['btnShowBoxPlaylist_', '</h2>'], ['', ''], $data->id); + } + + /* Lấy ID dạng số của bài hát */ + $element_track_id = 'div.box_info_field'; + $start = '<span class=\"export_listen\" id="NCTCounter__'; + $end = '">0<\/span>'; + $rule = "/$start(.*)$end/msU"; + foreach ($dom->find($element_track_id) as $data) { + $data_crawler = str_replace(['<!--', '-->'], ['', ''], $data); + preg_match($rule, $data_crawler, $result); + if (is_array($result) && count($result) == 2) { + $tracks['id_nct'][] = $result[1]; + } + } + + return $tracks; + } + + public function processNctGetTrackInfoFromTop100Playlist($url) + { + $tracks = []; + $dom = new Htmldom($url); + + /* Lấy ID dạng chuỗi của bài hát */ + $element_track_id_str = 'li'; + foreach ($dom->find($element_track_id_str) as $data) { + if (isset($data->key)) { + $tracks['id_nct_str'][] = str_replace([], [], $data->key); + } + } + + /* Lấy link bài hát */ + $element_track_link = 'li a'; + foreach ($dom->find($element_track_link) as $data) { + if (isset($data->class) && $data->class == 'button_new_window') { + $tracks['link'][] = str_replace([], [], $data->href); + } + } + + /* Lấy ID dạng số */ + $element_track_id = 'li span'; + foreach ($dom->find($element_track_id) as $data) { + if (isset($data->id) && isset($data->class) && $data->class == 'icon_listen') { + $tracks['id_nct'][] = str_replace(['NCTCounter_sg_', 'NCTCounter_pn_'], ['', ''], $data->id); + } + } + + return $tracks; + } + } \ No newline at end of file diff --git a/composer.json b/composer.json index fab9cd0..322d89c 100644 --- a/composer.json +++ b/composer.json @@ -6,8 +6,11 @@ "type": "project", "require": { "php": ">=5.6.4", + "ixudra/curl": "^6.19", "laravel/framework": "5.4.*", - "laravel/tinker": "~1.0" + "laravel/tinker": "~1.0", + "sunra/php-simple-html-dom-parser": "^1.5", + "yangqi/htmldom": "^1.0" }, "require-dev": { "fzaninotto/faker": "~1.4", diff --git a/config/api.php b/config/api.php index d46e5af..30e4cfe 100644 --- a/config/api.php +++ b/config/api.php @@ -62,7 +62,13 @@ return [ 'playlist' => 'https://zingmp3.vn/api/playlist/get-playlist-detail?id=ZWZB969E&ctime=1584213996&sig=e295c92bed3ce58a29a2fb8c69f9e1934bab807ae6f43c546eeb7ca4242be599171c9566f23296fdc137ee156fd307db4cfa1a4b528e5d99773065ee10c6a603&api_key=38e8643fb0dc04e8d65b99994d3dafff', 'new_release' => 'https://zingmp3.vn/api/chart/get-chart-new-release?ctime=1584214467&sig=66218d539436572b65c74d0c852550f7c7df0a3128aca9b5be13aa3dcd1d7c0bf49e3af459f75db87e3cc08f60c087684a0c92375ab3a8a784253502bbdf065f&api_key=38e8643fb0dc04e8d65b99994d3dafff', 'artist' => 'https://zingmp3.vn/api/artist/get-list?id=IWZ9Z08I&type=genre&sort=listen&start=1&count=1000&ctime=1584216597&sig=e2b8e1e48ccd12f560959913a6901fc76c9b93e1cafecba2d3517d8ab57eeb44717aa41eca70338bcf8bc8c84def1c43367ef44c887589db4ed81a1e2c042303&api_key=38e8643fb0dc04e8d65b99994d3dafff', - ] + ], + + 'nct' => [ + 'domain' => 'https://www.nhaccuatui.com/', + 'top100' => 'https://www.nhaccuatui.com/top100/top-100-nhac-tre.m3liaiy6vVsF.html', + 'playlist_top100' => 'https://www.nhaccuatui.com/playlist/top-100-nhac-tre-hay-nhat-va.m3liaiy6vVsF.html' + ], diff --git a/config/app.php b/config/app.php index 963ecb9..7e5c956 100644 --- a/config/app.php +++ b/config/app.php @@ -167,6 +167,7 @@ return [ * Package Service Providers... */ Laravel\Tinker\TinkerServiceProvider::class, + Yangqi\Htmldom\HtmldomServiceProvider::class, /* * Application Service Providers... @@ -225,6 +226,7 @@ return [ 'URL' => Illuminate\Support\Facades\URL::class, 'Validator' => Illuminate\Support\Facades\Validator::class, 'View' => Illuminate\Support\Facades\View::class, + 'Htmldom' => Yangqi\Htmldom\Htmldom::class, ], -- libgit2 0.27.0