Commit 49f2074b by Phạm Văn Đoan

cập nhật xử lý lấy bài hát từ NCT

parent ffd4b302
......@@ -42,12 +42,13 @@ class NctCrawlerTrackCommand extends Command
/**
* Execute the console command.
* Top 100 NCT: https://www.nhaccuatui.com/playlist/top-100-nhac-tre-hay-nhat-va.m3liaiy6vVsF.html
*
* @return mixed
*/
public function handle()
{
echo "\nBắt đầu xử lý lấy top 100 bài hát từ NCT.";
echo "\n Bắt đầu xử lý lấy top 100 bài hát từ NCT.";
// Lấy mảng ID của Zing những bài hát đã insert
$inserted = $this->trackRepository->getTrackInserted('title', 'original', []);
......@@ -58,64 +59,81 @@ class NctCrawlerTrackCommand extends Command
$url_playlist_top100 = config('api.nct.playlist_top100');
$tracks = $this->trackRepository->processNctGetTrackInfoFromTop100($url);
$tracks_playlist = $this->trackRepository->processNctGetTrackInfoFromTop100Playlist($url_playlist_top100);
//print_r($tracks); return false;
if (count($tracks['title']) == 0) return false;
if (count($tracks) == 0) return false;
$tracks_title = $tracks['title'];
$tracks_img = $tracks['img'];
$tracks_id_nct_str = $tracks['id_nct_str'];
$tracks_id_nct = $tracks['id_nct'];
$bulk_track_insert = [];
$bulk_track_update = [];
$key_insert = 0;
$key_update = 0;
foreach ($tracks_title as $key => $track) {
$bulk_track_insert = [];
foreach ($tracks as $key => $track) {
$link = $this->privateGeTrackLink($tracks_playlist, $track['id_nct']);
// Xử lý insert/update dựa vào tên bài hát
if (is_array($inserted) && count($inserted) > 0 && in_array($track, $inserted)) {
if (is_array($inserted) && count($inserted) > 0 && in_array($track['title'], $inserted)) {
// Cập nhật
$track_db = Track::where('title', $track)->take(1)->get();
$track_db = Track::where('title', $track['title'])->take(1)->get();
if (isset($track_db[0])) {
$track_db[0]->id_nct = $tracks_id_nct[$key];
$track_db[0]->id_nct_str = $tracks_id_nct_str[$key];
$track_db[0]->id_nct = $track['id_nct'];
$track_db[0]->id_nct_str = $track['id_nct_str'];
$track_db[0]->link_nct_top100 = $track['link'];
$track_db[0]->link_nct = $link;
$track_db[0]->crawler_at = Carbon::now();
$track_db[0]->save();
}
$key_update++;
} else {
$bulk_track_insert[$key_insert]['title'] = $track;
$bulk_track_insert[$key_insert]['title'] = $track['title'];
$bulk_track_insert[$key_insert]['slug'] = null;
$bulk_track_insert[$key_insert]['userid'] = 0;
$bulk_track_insert[$key_insert]['tag'] = 'vmusic';
$bulk_track_insert[$key_insert]['genre'] = 1;
$bulk_track_insert[$key_insert]['download_hash'] = md5($tracks_id_nct[$key]);
$bulk_track_insert[$key_insert]['download_hash'] = md5($track['id_nct']);
$bulk_track_insert[$key_insert]['time'] = time();
//$bulk_track_insert[$key_insert]['link'] = null;
$bulk_track_insert[$key_insert]['id_nct'] = $tracks_id_nct[$key];
$bulk_track_insert[$key_insert]['id_nct_str'] = $tracks_id_nct_str[$key];
//$bulk_track_insert[$key_insert]['singer_list'] = $singer_list;
//$bulk_track_insert[$key_insert]['track_duration'] = isset($track->duration) ? $track->duration : 0;
//$bulk_track_insert[$key_insert]['src_thumbnail'] = isset($track->thumbnail) ? $track->thumbnail : null;
$bulk_track_insert[$key_insert]['src_thumbnail_medium'] = $tracks_img[$key];
//$bulk_track_insert[$key_insert]['link_mv'] = null;
$bulk_track_insert[$key_insert]['id_nct'] = $track['id_nct'];
$bulk_track_insert[$key_insert]['id_nct_str'] = $track['id_nct_str'];
$bulk_track_insert[$key_insert]['src_thumbnail_medium'] = $track['img'];
$bulk_track_insert[$key_insert]['link_nct'] = $link;
$bulk_track_insert[$key_insert]['link_nct_top100'] = $track['link'];
$bulk_track_insert[$key_insert]['crawler_at'] = Carbon::now();
$key_insert++;
}
}
echo "\nSố bài hát insert: " . $key_insert;
echo "\nSố bài hát update: " . $key_update;
echo "\n Số bài hát insert: " . $key_insert;
echo "\n Số bài hát update: " . $key_update;
if (count($bulk_track_insert) > 0) {
if ($key_insert > 0) {
DB::table(Constants::TABLE_TRACKS)->insert($bulk_track_insert);
echo "\nInsert thành công.";
echo "\n Insert thành công.";
} else {
echo "\nKhông lấy được bài hát nào.";
echo "\n Không lấy được bài hát nào.";
}
// Ghi log
Crontjob::create(['message' => 'NctCrawlerTrackCommand is called at ' . Carbon::now()]);
}
/**
* Hàm lấy link tương ứng của bài hát với ID dạng số, không tìm thấy trả về null
* @param $tracks_playlist
* @param $track_id
* @return string
*/
private function privateGeTrackLink($tracks_playlist, $track_id)
{
if (is_array($tracks_playlist) && count($tracks_playlist) > 0) {
foreach ($tracks_playlist as $key => $val) {
if ($val['id_nct'] == $track_id) {
return $val['link'];
}
}
}
return null;
}
}
......@@ -121,7 +121,17 @@ class TrackRepository extends BaseRepository
}
}
return $tracks;
/* Tổng hợp dữ liệu thành mảng */
$result = [];
for($i=0; $i < count($tracks['title']); $i++) {
$result[$i]['id_nct_str'] = $tracks['id_nct_str'][$i];
$result[$i]['id_nct'] = $tracks['id_nct'][$i];
$result[$i]['title'] = $tracks['title'][$i];
$result[$i]['link'] = $tracks['link'][$i];
$result[$i]['img'] = $tracks['img'][$i];
}
return $result;
}
public function processNctGetTrackInfoFromTop100Playlist($url)
......@@ -153,7 +163,29 @@ class TrackRepository extends BaseRepository
}
}
return $tracks;
/* Tổng hợp dữ liệu thành mảng */
$result = [];
for($i=0; $i < count($tracks['id_nct_str']); $i++) {
$result[$i]['id_nct_str'] = $tracks['id_nct_str'][$i];
$result[$i]['id_nct'] = $tracks['id_nct'][$i];
$result[$i]['link'] = $tracks['link'][$i];
}
return $result;
}
/**
* Hàm lấy ds bài hát chưa có ảnh sau khi lấy từ NCT về
*
* @return array
*/
public function getNctTrackLinkEmpty()
{
$query = Track::select(['id', 'link_nct', 'id_zing', 'id_nct', 'id_keeng', 'src_thumbnail_medium'])
->whereNull('art')
->whereNotNull('src_thumbnail_medium');
return $query->get()->toArray();
}
......
......@@ -10,7 +10,7 @@
"laravel/framework": "5.4.*",
"laravel/tinker": "~1.0",
"sunra/php-simple-html-dom-parser": "^1.5",
"yangqi/htmldom": "^1.0"
"yangqi/htmldom":"@dev"
},
"require-dev": {
"fzaninotto/faker": "~1.4",
......@@ -53,5 +53,8 @@
"preferred-install": "dist",
"sort-packages": true,
"optimize-autoloader": true
}
},
"repositories": [
{"type": "path", "url": "packages/yangqi/htmldom"}
]
}
/vendor
composer.phar
composer.lock
.DS_Store
\ No newline at end of file
The MIT License (MIT)
Copyright (c) 2013 yangqi
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Htmldom
=======
A Htmldom package for Laravel 4 and 5 based on Simple HTML Dom Parser
## Installation
Add the following line to the `require` section of `composer.json`:
```json
{
"require": {
"yangqi/htmldom": "dev-master"
}
}
```
## Laravel 5 Setup (same as Laravel 4)
1. Add the service provider to `config/app.php`.
```php
'providers' => array(
...
'Yangqi\Htmldom\HtmldomServiceProvider',
...
```
2. Add alias to `config/app.php`.
```php
'aliases' => array(
...
'Htmldom' => 'Yangqi\Htmldom\Htmldom',
...
```
## Usage
1. Use following:
```php
$html = new \Htmldom('http://www.example.com');
// Find all images
foreach($html->find('img') as $element)
echo $element->src . '<br>';
// Find all links
foreach($html->find('a') as $element)
echo $element->href . '<br>';
```
See the detailed documentation http://simplehtmldom.sourceforge.net/manual.htm
{
"name": "yangqi/htmldom",
"type": "library",
"description": "Simple Html Dom Parser for Laravel 4",
"keywords": ["laravel", "html parser"],
"license": "MIT",
"authors": [
{
"name": "yangqi",
"email": "i@yangqi.me"
}
],
"require": {
"php": ">=5.3.0"
},
"autoload": {
"psr-0": {
"Yangqi\\Htmldom": "src/"
}
},
"minimum-stability": "dev"
}
\ No newline at end of file
<?php namespace Yangqi\Htmldom;
use Illuminate\Support\ServiceProvider;
class HtmldomServiceProvider extends ServiceProvider {
/**
* Indicates if loading of the provider is deferred.
*
* @var bool
*/
protected $defer = true;
/**
* Bootstrap the application events.
*
* @return void
*/
public function boot()
{
$this->package('yangqi/htmldom');
}
/**
* Register the service provider.
*
* @return void
*/
public function register()
{
$this->app->bind('htmldom', function()
{
return new Htmldom;
});
}
/**
* Get the services provided by the provider.
*
* @return array
*/
public function provides()
{
return array();
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment