Commit edac321e by Phạm Văn Đoan

tạo task lấy bài hát từ nhac.vn

parent 9de1bf88
...@@ -79,7 +79,6 @@ class NctCrawlerTrackCommand extends Command ...@@ -79,7 +79,6 @@ class NctCrawlerTrackCommand extends Command
$track_db[0]->track_duration = $duration; $track_db[0]->track_duration = $duration;
$track_db[0]->id_nct = $track['id_nct']; $track_db[0]->id_nct = $track['id_nct'];
$track_db[0]->id_nct_str = $track['id_nct_str']; $track_db[0]->id_nct_str = $track['id_nct_str'];
$track_db[0]->link_nct_top100 = $track['link'];
$track_db[0]->link_nct = $link; $track_db[0]->link_nct = $link;
$track_db[0]->crawler_at = Carbon::now(); $track_db[0]->crawler_at = Carbon::now();
$track_db[0]->save(); $track_db[0]->save();
...@@ -98,7 +97,6 @@ class NctCrawlerTrackCommand extends Command ...@@ -98,7 +97,6 @@ class NctCrawlerTrackCommand extends Command
$bulk_track_insert[$key_insert]['id_nct_str'] = $track['id_nct_str']; $bulk_track_insert[$key_insert]['id_nct_str'] = $track['id_nct_str'];
$bulk_track_insert[$key_insert]['src_thumbnail_medium'] = $track['img']; $bulk_track_insert[$key_insert]['src_thumbnail_medium'] = $track['img'];
$bulk_track_insert[$key_insert]['link_nct'] = $link; $bulk_track_insert[$key_insert]['link_nct'] = $link;
$bulk_track_insert[$key_insert]['link_nct_top100'] = $track['link'];
$bulk_track_insert[$key_insert]['crawler_at'] = Carbon::now(); $bulk_track_insert[$key_insert]['crawler_at'] = Carbon::now();
$key_insert++; $key_insert++;
......
...@@ -2,8 +2,13 @@ ...@@ -2,8 +2,13 @@
namespace App\Console\Commands\NhacVn; namespace App\Console\Commands\NhacVn;
use App\Helpers\Constants;
use App\Models\Crontjob;
use App\Models\Track;
use App\Repositories\TrackRepository; use App\Repositories\TrackRepository;
use Carbon\Carbon;
use Illuminate\Console\Command; use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
class NhacVnCrawlerTrackCommand extends Command class NhacVnCrawlerTrackCommand extends Command
{ {
...@@ -26,7 +31,7 @@ class NhacVnCrawlerTrackCommand extends Command ...@@ -26,7 +31,7 @@ class NhacVnCrawlerTrackCommand extends Command
/** /**
* Create a new command instance. * Create a new command instance.
* *
* NctCrawlerTrackCommand constructor. * NhacVnCrawlerTrackCommand constructor.
* @param TrackRepository $trackRepository * @param TrackRepository $trackRepository
*/ */
public function __construct(TrackRepository $trackRepository) public function __construct(TrackRepository $trackRepository)
...@@ -42,6 +47,65 @@ class NhacVnCrawlerTrackCommand extends Command ...@@ -42,6 +47,65 @@ class NhacVnCrawlerTrackCommand extends Command
*/ */
public function handle() public function handle()
{ {
// echo "\n Bắt đầu xử lý lấy top 20 bài hát từ nhac.vn.";
// Lấy mảng ID của Zing những bài hát đã insert
$inserted = $this->trackRepository->getTrackInserted('title', 'original', []);
// Lấy ds BXH về
$domain = config('api.nhac_vn.domain');
$url = config('api.nhac_vn.ranking20');
$tracks = $this->trackRepository->processNhacVnGetTrackInfoFromTop20($url);
if (count($tracks) == 0) return false;
$key_insert = 0;
$key_update = 0;
$bulk_track_insert = [];
foreach ($tracks as $key => $track) {
// Xử lý insert/update dựa vào tên bài hát
if (is_array($inserted) && count($inserted) > 0 && in_array($track['title'], $inserted)) {
// Cập nhật
$track_db = Track::where('title', $track['title'])->take(1)->get();
if (isset($track_db[0])) {
$track_db[0]->id_nhacvn = $track['id_nhacvn'];
$track_db[0]->link_nhacvn = $track['link'];
$track_db[0]->singer_list = $track['singer'];
$track_db[0]->crawler_at = Carbon::now();
$track_db[0]->save();
}
$key_update++;
} else {
$bulk_track_insert[$key_insert]['title'] = $track['title'];
$bulk_track_insert[$key_insert]['slug'] = null;
$bulk_track_insert[$key_insert]['userid'] = 0;
$bulk_track_insert[$key_insert]['tag'] = 'vmusic';
$bulk_track_insert[$key_insert]['genre'] = 1;
$bulk_track_insert[$key_insert]['download_hash'] = md5($track['id_nhacvn']);
$bulk_track_insert[$key_insert]['time'] = time();
$bulk_track_insert[$key_insert]['id_nhacvn'] = $track['id_nhacvn'];
$bulk_track_insert[$key_insert]['src_thumbnail_medium'] = $track['img'];
$bulk_track_insert[$key_insert]['link_nhacvn'] = $track['link'];
$bulk_track_insert[$key_insert]['singer_list'] = $track['singer'];
$bulk_track_insert[$key_insert]['crawler_at'] = Carbon::now();
$key_insert++;
}
}
echo "\n Số bài hát insert: " . $key_insert;
echo "\n Số bài hát update: " . $key_update;
if ($key_insert > 0) {
DB::table(Constants::TABLE_TRACKS)->insert($bulk_track_insert);
echo "\n Insert thành công.";
} else {
echo "\n Không lấy được bài hát nào.";
}
// Ghi log
Crontjob::create(['message' => 'NhacVnCrawlerTrackCommand is called at ' . Carbon::now()]);
} }
} }
...@@ -221,5 +221,63 @@ class TrackRepository extends BaseRepository ...@@ -221,5 +221,63 @@ class TrackRepository extends BaseRepository
return $query->get()->toArray(); return $query->get()->toArray();
} }
public function processNhacVnGetTrackInfoFromTop20($url)
{
$tracks = [];
$dom = getDom($url);
/* Lấy ID bài hát ------------------------------------------------------------------------------------------ */
$element_track_id = 'ul li a.addsongtmpl';
foreach ($dom->find($element_track_id) as $data) {
$tracks['id'][] = str_replace([], [], $data->id);
}
/* Lấy tên bài hát ------------------------------------------------------------------------------------------ */
$element_track_title = 'h3.over-text a';
foreach ($dom->find($element_track_title) as $data) {
$tracks['title'][] = str_replace([], [], $data->innertext);
}
/* Lấy đường dẫn ảnh của bài hát ---------------------------------------------------------------------------- */
$element_track_img = 'ul.bxh_song_list li a img';
foreach ($dom->find($element_track_img) as $data) {
$tracks['img'][] = str_replace([], [], $data->src);
}
/* Lấy link bài hát ----------------------------------------------------------------------------------------- */
$element_track_link = 'h3.over-text a';
foreach ($dom->find($element_track_link) as $data) {
$link = str_replace([], [], $data->href);
$tracks['link'][] = $link;
}
/* Lấy tên ca sĩ hát ---------------------------------------------------------------------------------------- */
$element_track_singer = 'ul.bxh_song_list li a img';
foreach ($dom->find($element_track_singer) as $data) {
$tracks['singer'][] = str_replace([], [], $data->alt);
}
/* Lấy url thông tin cá nhân ca sĩ -------------------------------------------------------------------------- */
$element_singer_list = 'p.over-text a.singer';
foreach ($dom->find($element_singer_list) as $data) {
$tracks['singer_list'][] = [
'name' => str_replace([], [], $data->innertext),
'url' => str_replace([], [], $data->href)
];
}
/* Tổng hợp dữ liệu thành mảng ------------------------------------------------------------------------------ */
$result = [];
for($i=0; $i < count($tracks['title']); $i++) {
$result[$i]['id_nhacvn'] = $tracks['id'][$i];
$result[$i]['title'] = $tracks['title'][$i];
$result[$i]['img'] = $tracks['img'][$i];
$result[$i]['link'] = $tracks['link'][$i];
$result[$i]['singer'] = $tracks['singer'][$i];
}
return $result;
}
} }
\ No newline at end of file
...@@ -10,6 +10,10 @@ return [ ...@@ -10,6 +10,10 @@ return [
| Here you may specify which of the database connections below you wish | Here you may specify which of the database connections below you wish
| to use as your default connection for all database work. Of course | to use as your default connection for all database work. Of course
| you may use many connections at once using the Database library. | you may use many connections at once using the Database library.
|--------------------------------------------------------------------------
| nhac.vn:
| - BXH hiên tại: https://nhac.vn/bang-xep-hang-bai-hat-viet-nam-bxdE
| - BXH tuần khác: https://nhac.vn/bang-xep-hang-bai-hat-viet-nam-bxdE/tuan-13
| |
*/ */
...@@ -70,6 +74,11 @@ return [ ...@@ -70,6 +74,11 @@ return [
'playlist_top100' => 'https://www.nhaccuatui.com/playlist/top-100-nhac-tre-hay-nhat-va.m3liaiy6vVsF.html' 'playlist_top100' => 'https://www.nhaccuatui.com/playlist/top-100-nhac-tre-hay-nhat-va.m3liaiy6vVsF.html'
], ],
'nhac_vn' => [
'domain' => 'https://nhac.vn/',
'ranking20' => 'https://nhac.vn/bang-xep-hang-bai-hat-viet-nam-bxdE'
],
]; ];
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment