diff --git a/dataframe.png b/dataframe.png new file mode 100644 index 0000000..1ff81b9 Binary files /dev/null and b/dataframe.png differ diff --git a/index.html b/index.html index 0a6cea6..ab3ac60 100644 --- a/index.html +++ b/index.html @@ -115,6 +115,7 @@ - Access API - Channel upload playlist - Video statistics + - `pandas` dataframe
### 4. Get YouTube video statistics @@ -132,40 +133,44 @@ ```
-
-
# tubestates/youtube_api.py
+				
+ ```python [|3|5-16|17-18|20-29|30-32] + # tubestates/youtube_api.py -upload_playlist_ID = channel_data['upload_playlist_ID'] + upload_playlist_ID = channel_data['upload_playlist_ID'] -video_response = [] -next_page_token = None -while True: - # obtaining video ID + titles - playlist_request = self.youtube.playlistItems().list( - part='snippet,contentDetails', - maxResults=50, # API Limit is 50 - pageToken=next_page_token, - playlistId=upload_playlist_ID, - ) - playlist_response = playlist_request.execute() - # isolating video ID - vid_subset = [ vid_ID['contentDetails']['videoId'] - for vid_ID in playlist_response['items'] ] - # retrieving video statistics - vid_info_subset_request = self.youtube.videos().list( - part='snippet,contentDetails,statistics', - id=vid_subset - ) - vid_info_subset_response = vid_info_subset_request.execute() - video_response.append(vid_info_subset_response) - # obtaining page token - next_page_token = playlist_response.get('nextPageToken') # get method used because token may not exist - if next_page_token is None: - break + video_response = [] + next_page_token = None + while True: + # obtaining video ID + titles + playlist_request = self.youtube.playlistItems().list( + part='snippet,contentDetails', + maxResults=50, # API Limit is 50 + pageToken=next_page_token, + playlistId=upload_playlist_ID, + ) + playlist_response = playlist_request.execute() + # isolating video ID + vid_subset = [ vid_ID['contentDetails']['videoId'] + for vid_ID in playlist_response['items'] ] + # retrieving video statistics + vid_info_subset_request = self.youtube.videos().list( + part='snippet,contentDetails,statistics', + id=vid_subset + ) + vid_info_subset_response = vid_info_subset_request.execute() + video_response.append(vid_info_subset_response) + # obtaining page token + next_page_token = playlist_response.get('nextPageToken') # get method used because token may not exist + if next_page_token is None: + break -df = pd.json_normalize(video_response, 'items') -return df -
+ df = pd.json_normalize(video_response, 'items') + return df +
+
+ ### Video statistics + ![](dataframe.png)
## How does TubeStats work? @@ -202,7 +207,7 @@ return df
## 6. Testing - ```python [|16-20] + ```python [|15-20] # tests/tests_youtube_api.py from tubestats.youtube_api import create_api, YouTubeAPI from tests.test_settings import set_channel_ID_test_case @@ -344,27 +349,43 @@ return df
## Somethings I would like to discuss + - DataFrame and memory + - Error handling + - Async?
- df = self.df - df = df[['snippet.publishedAt', - 'snippet.title', - ... - 'statistics.favoriteCount', - 'statistics.commentCount']] - - df = df.fillna(0) + ### DataFrame immutability and memory? + ```python [] + df = self.df + df = df[['snippet.publishedAt', + 'snippet.title', + ... + 'statistics.favoriteCount', + 'statistics.commentCount']] - # changing dtypes - df = df.astype({'statistics.viewCount': 'int', - ... - 'statistics.commentCount': 'int',}) - # applying natural log to view count as data is tail heavy - df['statistics.viewCount_NLOG'] = df['statistics.viewCount'].apply(lambda x : np.log(x)) + df = df.fillna(0) - df = df.sort_values(by='snippet.publishedAt_REFORMATED', ascending=True) - return DataFrame) + df = df.astype({'statistics.viewCount': 'int', + ... + 'statistics.commentCount': 'int',}) + df['statistics.viewCount_NLOG'] = df['statistics.viewCount'].apply(lambda x : np.log(x)) + + df = df.sort_values(by='snippet.publishedAt_REFORMATED', ascending=True)
+
+ ## What did I learn + - Project based learning + - 'minimal viable product' +
+
+ ## Conclusion + - Analysing consistency + - YouTube Data API --> Heroku + - Share your work! +
+
+ ## Acknowledgements + - Menno