Source code for ips_python.script

try:
    from ips_python.constants import DESCRIPTION_COLUMN_NAME, ORG_ID_COLUMN_NAME
    from ips_python.preprocessing import preprocess_query_text
    from ips_python.vectorize import vectorize_input_text
    from ips_python.cosine import get_cosine_similarity
    from ips_python.refinement import process_results, gather_top_results
    from ips_python.word2vecaverage import average_per_doc
except ModuleNotFoundError:
    from constants import DESCRIPTION_COLUMN_NAME, ORG_ID_COLUMN_NAME
    from preprocessing import preprocess_query_text
    from vectorize import vectorize_input_text
    from cosine import get_cosine_similarity
    from refinement import process_results, gather_top_results
    from word2vecaverage import average_per_doc


[docs]def download_data(): """ this is a placeholder function to show that we need to run something in order to procure the data """ pass
def process_query( query_text, vectorizer, term_document_matrix, processed_iati_records, full_iati_records, ): processed_query_dataframe = preprocess_query_text(query_text) vectorized_query = vectorize_input_text(processed_query_dataframe, vectorizer) df_result = get_cosine_similarity( vectorized_query, term_document_matrix, processed_iati_records ) smart_results = process_results(df_result, full_iati_records) top_results = gather_top_results(smart_results, ORG_ID_COLUMN_NAME, 3) return top_results def process_query_embeddings( query_text, w2v_model, w2v_avg, processed_iati_records, full_iati_records ): processed_query_dataframe = preprocess_query_text(query_text) query_average = average_per_doc( str(processed_query_dataframe[DESCRIPTION_COLUMN_NAME][0]), w2v_model, 300 ).reshape(1, -1) df_result = get_cosine_similarity(query_average, w2v_avg, processed_iati_records) smart_results = process_results(df_result, full_iati_records) top_results = gather_top_results(smart_results, ORG_ID_COLUMN_NAME, 3) return top_results