Commit 043c934f by Paktalin

finished cleaning the dataframe

parent 8890361f
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -46,10 +46,13 @@ def clean_dataframe():
df.columns = ['distance', 'noun_like', 'noun_like_form', 'noun_like_pos', 'sentence', 'verb', 'verbs_form']
df = df[pd.notnull(df['noun_like_form'])] # remove examples with null forms
df = df[~df['noun_like_form'].str.contains('\|')] # remove example with several forms
df = df[~df['sentence'].str.contains('A post shared by')]
df = df[df['noun_like_form'] != '?'] # remove examples with unknown forms
save_csv(df, 'cleaned_dataframe.csv', sep='~')
print(df[df['distance'] > 100]['sentence'])
plt.scatter(df['distance'], df['noun_like_form'], alpha=0.2, c='k')
plt.scatter(df['distance'], df['noun_like_pos'], alpha=0.05, c='green')
plt.show()
# df = verbs_dict_to_df()
# save_csv(df, 'verbs_with_noun_likes.csv', sep='~')
clean_dataframe()
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment