From 09338dae7663264afdc930a023d960ed2fa6abfe Mon Sep 17 00:00:00 2001
From: doanpv <doanpv@dcv.vn>
Date: Thu, 26 Mar 2020 08:46:20 +0700
Subject: [PATCH] tạo task lấy bài hát từ NCT

---
 app/Console/Commands/Nct/NctCrawlerTrackCommand.php           | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 app/Console/Commands/Nct/NctCrawlerUpdateTrackLinkCommand.php |  47 +++++++++++++++++++++++++++++++++++++++++++++++
 app/Console/Kernel.php                                        |   3 +++
 app/Repositories/TrackRepository.php                          |  92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 composer.json                                                 |   5 ++++-
 config/api.php                                                |   8 +++++++-
 config/app.php                                                |   2 ++
 7 files changed, 276 insertions(+), 2 deletions(-)
 create mode 100644 app/Console/Commands/Nct/NctCrawlerTrackCommand.php
 create mode 100644 app/Console/Commands/Nct/NctCrawlerUpdateTrackLinkCommand.php

diff --git a/app/Console/Commands/Nct/NctCrawlerTrackCommand.php b/app/Console/Commands/Nct/NctCrawlerTrackCommand.php
new file mode 100644
index 0000000..2d77ddf
--- /dev/null
+++ b/app/Console/Commands/Nct/NctCrawlerTrackCommand.php
@@ -0,0 +1,121 @@
+<?php
+
+namespace App\Console\Commands\Nct;
+
+use App\Helpers\Constants;
+use App\Models\Crontjob;
+use App\Models\Track;
+use App\Repositories\TrackRepository;
+use Carbon\Carbon;
+use Illuminate\Console\Command;
+use Illuminate\Support\Facades\DB;
+
+class NctCrawlerTrackCommand extends Command
+{
+    /**
+     * The name and signature of the console command.
+     *
+     * @var string
+     */
+    protected $signature = 'nct:get-track';
+
+    /**
+     * The console command description.
+     *
+     * @var string
+     */
+    protected $description = 'Lấy thông tin bài hát từ NCT';
+
+    protected $trackRepository;
+
+    /**
+     * Create a new command instance.
+     *
+     * NctCrawlerTrackCommand constructor.
+     * @param TrackRepository $trackRepository
+     */
+    public function __construct(TrackRepository $trackRepository)
+    {
+        parent::__construct();
+        $this->trackRepository = $trackRepository;
+    }
+
+    /**
+     * Execute the console command.
+     *
+     * @return mixed
+     */
+    public function handle()
+    {
+        echo "\nBắt đầu xử lý lấy top 100 bài hát từ NCT.";
+
+        // Lấy mảng ID của Zing những bài hát đã insert
+        $inserted = $this->trackRepository->getTrackInserted('title', 'original', []);
+
+        // Lấy ds BXH về
+        $domain = config('api.nct.domain');
+        $url = config('api.nct.top100');
+        $url_playlist_top100 = config('api.nct.playlist_top100');
+
+        $tracks = $this->trackRepository->processNctGetTrackInfoFromTop100($url);
+
+        if (count($tracks['title']) == 0) return false;
+
+        $tracks_title = $tracks['title'];
+        $tracks_img = $tracks['img'];
+        $tracks_id_nct_str = $tracks['id_nct_str'];
+        $tracks_id_nct = $tracks['id_nct'];
+
+        $bulk_track_insert = [];
+        $bulk_track_update = [];
+        $key_insert = 0;
+        $key_update = 0;
+        foreach ($tracks_title as $key => $track) {
+            // Xử lý insert/update dựa vào tên bài hát
+            if (is_array($inserted) && count($inserted) > 0 && in_array($track, $inserted)) {
+                // Cập nhật
+                $track_db = Track::where('title', $track)->take(1)->get();
+                if (isset($track_db[0])) {
+                    $track_db[0]->id_nct = $tracks_id_nct[$key];
+                    $track_db[0]->id_nct_str = $tracks_id_nct_str[$key];
+                    $track_db[0]->crawler_at = Carbon::now();
+                    $track_db[0]->save();
+                }
+                $key_update++;
+            } else {
+                $bulk_track_insert[$key_insert]['title'] = $track;
+                $bulk_track_insert[$key_insert]['slug'] = null;
+                $bulk_track_insert[$key_insert]['userid'] = 0;
+                $bulk_track_insert[$key_insert]['tag'] = 'vmusic';
+                $bulk_track_insert[$key_insert]['genre'] = 1;
+                $bulk_track_insert[$key_insert]['download_hash'] = md5($tracks_id_nct[$key]);
+                $bulk_track_insert[$key_insert]['time'] = time();
+                //$bulk_track_insert[$key_insert]['link'] = null;
+                $bulk_track_insert[$key_insert]['id_nct'] = $tracks_id_nct[$key];
+                $bulk_track_insert[$key_insert]['id_nct_str'] = $tracks_id_nct_str[$key];
+                //$bulk_track_insert[$key_insert]['singer_list'] = $singer_list;
+                //$bulk_track_insert[$key_insert]['track_duration'] = isset($track->duration) ? $track->duration : 0;
+                //$bulk_track_insert[$key_insert]['src_thumbnail'] = isset($track->thumbnail) ? $track->thumbnail : null;
+                $bulk_track_insert[$key_insert]['src_thumbnail_medium'] = $tracks_img[$key];
+                //$bulk_track_insert[$key_insert]['link_mv'] = null;
+                $bulk_track_insert[$key_insert]['crawler_at'] = Carbon::now();
+
+                $key_insert++;
+            }
+        }
+
+        echo "\nSố bài hát insert: " . $key_insert;
+        echo "\nSố bài hát update: " . $key_update;
+
+        if (count($bulk_track_insert) > 0) {
+            DB::table(Constants::TABLE_TRACKS)->insert($bulk_track_insert);
+            echo "\nInsert thành công.";
+        } else {
+            echo "\nKhông lấy được bài hát nào.";
+        }
+
+        // Ghi log
+        Crontjob::create(['message' => 'NctCrawlerTrackCommand is called at ' . Carbon::now()]);
+
+    }
+}
diff --git a/app/Console/Commands/Nct/NctCrawlerUpdateTrackLinkCommand.php b/app/Console/Commands/Nct/NctCrawlerUpdateTrackLinkCommand.php
new file mode 100644
index 0000000..6b70b9d
--- /dev/null
+++ b/app/Console/Commands/Nct/NctCrawlerUpdateTrackLinkCommand.php
@@ -0,0 +1,47 @@
+<?php
+
+namespace App\Console\Commands\Nct;
+
+use App\Repositories\TrackRepository;
+use Illuminate\Console\Command;
+
+class NctCrawlerUpdateTrackLinkCommand extends Command
+{
+    /**
+     * The name and signature of the console command.
+     *
+     * @var string
+     */
+    protected $signature = 'nct:update-track-link';
+
+    /**
+     * The console command description.
+     *
+     * @var string
+     */
+    protected $description = 'Cập nhật link bài hát từ NCT';
+
+    protected $trackRepository;
+
+    /**
+     * Create a new command instance.
+     *
+     * NctCrawlerTrackCommand constructor.
+     * @param TrackRepository $trackRepository
+     */
+    public function __construct(TrackRepository $trackRepository)
+    {
+        parent::__construct();
+        $this->trackRepository = $trackRepository;
+    }
+
+    /**
+     * Execute the console command.
+     *
+     * @return mixed
+     */
+    public function handle()
+    {
+        //
+    }
+}
diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php
index 344cf0d..80b95a4 100644
--- a/app/Console/Kernel.php
+++ b/app/Console/Kernel.php
@@ -3,6 +3,7 @@
 namespace App\Console;
 
 use App\Console\Commands\CrontjobCommand;
+use App\Console\Commands\Nct\NctCrawlerTrackCommand;
 use App\Console\Commands\Zing\ZingCrawlerNewRealeaseCommand;
 use App\Console\Commands\Zing\ZingCrawlerTrackCommand;
 use App\Console\Commands\Zing\ZingCrawlerTrackImageCommand;
@@ -21,6 +22,8 @@ class Kernel extends ConsoleKernel
         ZingCrawlerTrackCommand::class,
         ZingCrawlerNewRealeaseCommand::class,
         ZingCrawlerTrackImageCommand::class,
+        NctCrawlerTrackCommand::class,
+
 
     ];
 
diff --git a/app/Repositories/TrackRepository.php b/app/Repositories/TrackRepository.php
index 0bb658b..66f7be1 100644
--- a/app/Repositories/TrackRepository.php
+++ b/app/Repositories/TrackRepository.php
@@ -3,6 +3,8 @@
 namespace App\Repositories;
 
 use App\Models\Track;
+use Sunra\PhpSimple\HtmlDomParser;
+use Yangqi\Htmldom\Htmldom;
 
 class TrackRepository extends BaseRepository
 {
@@ -64,5 +66,95 @@ class TrackRepository extends BaseRepository
         return $query->get()->toArray();
     }
 
+    public function processNctGetTrackInfoFromTop100($url)
+    {
+        $tracks = [];
+        $dom = getDom($url);
+
+        /* Lấy tên bài hát */
+        $element_track_title = 'div.box_info_field h3.h3 a';
+        foreach ($dom->find($element_track_title) as $data) {
+            $tracks['title'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->innertext);
+        }
+
+        /* Lấy đường dẫn ảnh của bài hát */
+        $element_track_img = 'div.box_info_field a img';
+        foreach ($dom->find($element_track_img) as $data) {
+            $tracks['img'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->{"data-src"});
+        }
+
+        /* Lấy link bài hát */
+        $element_track_link = 'div.box_info_field h3.h3 a';
+        foreach ($dom->find($element_track_link) as $data) {
+            $link = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->href);
+            $tracks['link'][] = $link;
+        }
+
+        /* Lấy tên ca sĩ hát */
+        $element_track_singer = 'div.box_info_field h4.list_name_singer a';
+        foreach ($dom->find($element_track_singer) as $data) {
+            $tracks['singer'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->innertext);
+        }
+
+        /* Lấy url thông tin cá nhân ca sĩ */
+        $element_singer_profile = 'div.box_info_field h4.list_name_singer a';
+        foreach ($dom->find($element_singer_profile) as $data) {
+            $tracks['url_profile'][] = str_replace(['<h2 class="card-header">', '</h2>'], ['', ''], $data->href);
+        }
+
+        /* Lấy ID dạng chuỗi của bài hát */
+        $element_track_id_str = 'div.box_song_action a.button_add_playlist';
+        foreach ($dom->find($element_track_id_str) as $data) {
+            $tracks['id_nct_str'][] = str_replace(['btnShowBoxPlaylist_', '</h2>'], ['', ''], $data->id);
+        }
+
+        /* Lấy ID dạng số của bài hát */
+        $element_track_id = 'div.box_info_field';
+        $start = '<span class=\"export_listen\" id="NCTCounter__';
+        $end = '">0<\/span>';
+        $rule = "/$start(.*)$end/msU";
+        foreach ($dom->find($element_track_id) as $data) {
+            $data_crawler = str_replace(['<!--', '-->'], ['', ''], $data);
+            preg_match($rule, $data_crawler, $result);
+            if (is_array($result) && count($result) == 2) {
+                $tracks['id_nct'][] = $result[1];
+            }
+        }
+
+        return $tracks;
+    }
+
+    public function processNctGetTrackInfoFromTop100Playlist($url)
+    {
+        $tracks = [];
+        $dom = new Htmldom($url);
+
+        /* Lấy ID dạng chuỗi của bài hát */
+        $element_track_id_str = 'li';
+        foreach ($dom->find($element_track_id_str) as $data) {
+            if (isset($data->key)) {
+                $tracks['id_nct_str'][] = str_replace([], [], $data->key);
+            }
+        }
+
+        /* Lấy link bài hát */
+        $element_track_link = 'li a';
+        foreach ($dom->find($element_track_link) as $data) {
+            if (isset($data->class) && $data->class == 'button_new_window') {
+                $tracks['link'][] = str_replace([], [], $data->href);
+            }
+        }
+
+        /* Lấy ID dạng số */
+        $element_track_id = 'li span';
+        foreach ($dom->find($element_track_id) as $data) {
+            if (isset($data->id) && isset($data->class) && $data->class == 'icon_listen') {
+                $tracks['id_nct'][] = str_replace(['NCTCounter_sg_', 'NCTCounter_pn_'], ['', ''], $data->id);
+            }
+        }
+
+        return $tracks;
+    }
+
 
 }
\ No newline at end of file
diff --git a/composer.json b/composer.json
index fab9cd0..322d89c 100644
--- a/composer.json
+++ b/composer.json
@@ -6,8 +6,11 @@
     "type": "project",
     "require": {
         "php": ">=5.6.4",
+        "ixudra/curl": "^6.19",
         "laravel/framework": "5.4.*",
-        "laravel/tinker": "~1.0"
+        "laravel/tinker": "~1.0",
+        "sunra/php-simple-html-dom-parser": "^1.5",
+        "yangqi/htmldom": "^1.0"
     },
     "require-dev": {
         "fzaninotto/faker": "~1.4",
diff --git a/config/api.php b/config/api.php
index d46e5af..30e4cfe 100644
--- a/config/api.php
+++ b/config/api.php
@@ -62,7 +62,13 @@ return [
         'playlist' => 'https://zingmp3.vn/api/playlist/get-playlist-detail?id=ZWZB969E&ctime=1584213996&sig=e295c92bed3ce58a29a2fb8c69f9e1934bab807ae6f43c546eeb7ca4242be599171c9566f23296fdc137ee156fd307db4cfa1a4b528e5d99773065ee10c6a603&api_key=38e8643fb0dc04e8d65b99994d3dafff',
         'new_release' => 'https://zingmp3.vn/api/chart/get-chart-new-release?ctime=1584214467&sig=66218d539436572b65c74d0c852550f7c7df0a3128aca9b5be13aa3dcd1d7c0bf49e3af459f75db87e3cc08f60c087684a0c92375ab3a8a784253502bbdf065f&api_key=38e8643fb0dc04e8d65b99994d3dafff',
         'artist' => 'https://zingmp3.vn/api/artist/get-list?id=IWZ9Z08I&type=genre&sort=listen&start=1&count=1000&ctime=1584216597&sig=e2b8e1e48ccd12f560959913a6901fc76c9b93e1cafecba2d3517d8ab57eeb44717aa41eca70338bcf8bc8c84def1c43367ef44c887589db4ed81a1e2c042303&api_key=38e8643fb0dc04e8d65b99994d3dafff',
-    ]
+    ],
+
+    'nct' => [
+        'domain' => 'https://www.nhaccuatui.com/',
+        'top100' => 'https://www.nhaccuatui.com/top100/top-100-nhac-tre.m3liaiy6vVsF.html',
+        'playlist_top100' => 'https://www.nhaccuatui.com/playlist/top-100-nhac-tre-hay-nhat-va.m3liaiy6vVsF.html'
+    ],
 
 
 
diff --git a/config/app.php b/config/app.php
index 963ecb9..7e5c956 100644
--- a/config/app.php
+++ b/config/app.php
@@ -167,6 +167,7 @@ return [
          * Package Service Providers...
          */
         Laravel\Tinker\TinkerServiceProvider::class,
+        Yangqi\Htmldom\HtmldomServiceProvider::class,
 
         /*
          * Application Service Providers...
@@ -225,6 +226,7 @@ return [
         'URL' => Illuminate\Support\Facades\URL::class,
         'Validator' => Illuminate\Support\Facades\Validator::class,
         'View' => Illuminate\Support\Facades\View::class,
+        'Htmldom' => Yangqi\Htmldom\Htmldom::class,
 
     ],
 
--
libgit2 0.27.0