final-draft
This commit is contained in:
parent
456315a6f5
commit
f133567e76
Binary file not shown.
After Width: | Height: | Size: 749 KiB |
117
index.html
117
index.html
|
@ -115,6 +115,7 @@
|
||||||
- Access API
|
- Access API
|
||||||
- Channel upload playlist
|
- Channel upload playlist
|
||||||
- Video statistics
|
- Video statistics
|
||||||
|
- `pandas` dataframe
|
||||||
</section>
|
</section>
|
||||||
<section data-markdown>
|
<section data-markdown>
|
||||||
### 4. Get YouTube video statistics
|
### 4. Get YouTube video statistics
|
||||||
|
@ -132,40 +133,44 @@
|
||||||
```
|
```
|
||||||
|
|
||||||
</section>
|
</section>
|
||||||
<section>
|
<section data-markdown>
|
||||||
<pre><code data-line-numbers="3|5-16|17-18|18-29|30-32"># tubestates/youtube_api.py
|
```python [|3|5-16|17-18|20-29|30-32]
|
||||||
|
# tubestates/youtube_api.py
|
||||||
|
|
||||||
upload_playlist_ID = channel_data['upload_playlist_ID']
|
upload_playlist_ID = channel_data['upload_playlist_ID']
|
||||||
|
|
||||||
video_response = []
|
video_response = []
|
||||||
next_page_token = None
|
next_page_token = None
|
||||||
while True:
|
while True:
|
||||||
# obtaining video ID + titles
|
# obtaining video ID + titles
|
||||||
playlist_request = self.youtube.playlistItems().list(
|
playlist_request = self.youtube.playlistItems().list(
|
||||||
part='snippet,contentDetails',
|
part='snippet,contentDetails',
|
||||||
maxResults=50, # API Limit is 50
|
maxResults=50, # API Limit is 50
|
||||||
pageToken=next_page_token,
|
pageToken=next_page_token,
|
||||||
playlistId=upload_playlist_ID,
|
playlistId=upload_playlist_ID,
|
||||||
)
|
)
|
||||||
playlist_response = playlist_request.execute()
|
playlist_response = playlist_request.execute()
|
||||||
# isolating video ID
|
# isolating video ID
|
||||||
vid_subset = [ vid_ID['contentDetails']['videoId']
|
vid_subset = [ vid_ID['contentDetails']['videoId']
|
||||||
for vid_ID in playlist_response['items'] ]
|
for vid_ID in playlist_response['items'] ]
|
||||||
# retrieving video statistics
|
# retrieving video statistics
|
||||||
vid_info_subset_request = self.youtube.videos().list(
|
vid_info_subset_request = self.youtube.videos().list(
|
||||||
part='snippet,contentDetails,statistics',
|
part='snippet,contentDetails,statistics',
|
||||||
id=vid_subset
|
id=vid_subset
|
||||||
)
|
)
|
||||||
vid_info_subset_response = vid_info_subset_request.execute()
|
vid_info_subset_response = vid_info_subset_request.execute()
|
||||||
video_response.append(vid_info_subset_response)
|
video_response.append(vid_info_subset_response)
|
||||||
# obtaining page token
|
# obtaining page token
|
||||||
next_page_token = playlist_response.get('nextPageToken') # get method used because token may not exist
|
next_page_token = playlist_response.get('nextPageToken') # get method used because token may not exist
|
||||||
if next_page_token is None:
|
if next_page_token is None:
|
||||||
break
|
break
|
||||||
|
|
||||||
df = pd.json_normalize(video_response, 'items')
|
df = pd.json_normalize(video_response, 'items')
|
||||||
return df
|
return df
|
||||||
</code></pre>
|
</section>
|
||||||
|
<section data-markdown>
|
||||||
|
### Video statistics
|
||||||
|
![](dataframe.png)
|
||||||
</section>
|
</section>
|
||||||
<section data-markdown>
|
<section data-markdown>
|
||||||
## How does TubeStats work?
|
## How does TubeStats work?
|
||||||
|
@ -202,7 +207,7 @@ return df
|
||||||
</section>
|
</section>
|
||||||
<section data-markdown>
|
<section data-markdown>
|
||||||
## 6. Testing
|
## 6. Testing
|
||||||
```python [|16-20]
|
```python [|15-20]
|
||||||
# tests/tests_youtube_api.py
|
# tests/tests_youtube_api.py
|
||||||
from tubestats.youtube_api import create_api, YouTubeAPI
|
from tubestats.youtube_api import create_api, YouTubeAPI
|
||||||
from tests.test_settings import set_channel_ID_test_case
|
from tests.test_settings import set_channel_ID_test_case
|
||||||
|
@ -344,27 +349,43 @@ return df
|
||||||
</section>
|
</section>
|
||||||
<section data-markdown>
|
<section data-markdown>
|
||||||
## Somethings I would like to discuss
|
## Somethings I would like to discuss
|
||||||
|
- DataFrame and memory
|
||||||
|
- Error handling
|
||||||
|
- Async?
|
||||||
</section>
|
</section>
|
||||||
<section data-markdown>
|
<section data-markdown>
|
||||||
df = self.df
|
### DataFrame immutability and memory?
|
||||||
df = df[['snippet.publishedAt',
|
```python []
|
||||||
'snippet.title',
|
df = self.df
|
||||||
...
|
df = df[['snippet.publishedAt',
|
||||||
'statistics.favoriteCount',
|
'snippet.title',
|
||||||
'statistics.commentCount']]
|
...
|
||||||
|
'statistics.favoriteCount',
|
||||||
df = df.fillna(0)
|
'statistics.commentCount']]
|
||||||
|
|
||||||
# changing dtypes
|
df = df.fillna(0)
|
||||||
df = df.astype({'statistics.viewCount': 'int',
|
|
||||||
...
|
|
||||||
'statistics.commentCount': 'int',})
|
|
||||||
# applying natural log to view count as data is tail heavy
|
|
||||||
df['statistics.viewCount_NLOG'] = df['statistics.viewCount'].apply(lambda x : np.log(x))
|
|
||||||
|
|
||||||
df = df.sort_values(by='snippet.publishedAt_REFORMATED', ascending=True)
|
df = df.astype({'statistics.viewCount': 'int',
|
||||||
return DataFrame)
|
...
|
||||||
|
'statistics.commentCount': 'int',})
|
||||||
|
df['statistics.viewCount_NLOG'] = df['statistics.viewCount'].apply(lambda x : np.log(x))
|
||||||
|
|
||||||
|
df = df.sort_values(by='snippet.publishedAt_REFORMATED', ascending=True)
|
||||||
</section>
|
</section>
|
||||||
|
<section data-markdown>
|
||||||
|
## What did I learn
|
||||||
|
- Project based learning
|
||||||
|
- 'minimal viable product'
|
||||||
|
</section>
|
||||||
|
<section data-markdown>
|
||||||
|
## Conclusion
|
||||||
|
- Analysing consistency
|
||||||
|
- YouTube Data API --> Heroku
|
||||||
|
- Share your work!
|
||||||
|
</section>
|
||||||
|
<section data-markdown>
|
||||||
|
## Acknowledgements
|
||||||
|
- Menno
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
Loading…
Reference in New Issue