| | import os |
| | import json |
| |
|
| | os.environ["CUDA_VISIBLE_DEVICES"] = "1" |
| |
|
| |
|
| | def read_json(file_path): |
| | with open(file_path, 'r', encoding='utf-8') as file: |
| | data = json.load(file) |
| | return data |
| |
|
| | def write_json(file_path, data): |
| | with open(file_path, 'w', encoding='utf-8') as file: |
| | json.dump(data, file, ensure_ascii=False, indent=4) |
| |
|
| |
|
| | import os |
| | from openai import OpenAI |
| | import pprint |
| | import json |
| | from llamaapi import LlamaAPI |
| |
|
| | |
| | llama = LlamaAPI("LL-SmrO4FiBWvkfaGskA4fe6qLSVa7Ob5B83jOojHNq8HkrukjRRG4Xt3CF1mLV9u6o") |
| | os.environ["OPENAI_API_KEY"] = "sk-proj-Jmlrkk0HauWRhffybWOKT3BlbkFJIIuX6dFVCyVG7y6lGwsh" |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| |
|
| |
|
| | from chat import MiniCPMVChat, img2base64 |
| | import torch |
| | import json |
| | from PIL import Image |
| |
|
| |
|
| | torch.manual_seed(0) |
| | chat_model = MiniCPMVChat('/code/ICLR_2024/Model/MiniCPM-Llama3-V-2_5') |
| |
|
| |
|
| | image_path = '/code/ICLR_2024/SeeClick/output_image_27.png' |
| | |
| | |
| |
|
| | qs = """ |
| | List all the application name and location in the image that can be interacted with, the result shoudl be like a list |
| | """ |
| |
|
| | im_64 = img2base64(image_path) |
| | msgs = [{"role": "user", "content": qs}] |
| | inputs = {"image": im_64, "question": json.dumps(msgs)} |
| | answer = chat_model.chat(inputs) |
| |
|
| | data = read_json("/code/ICLR_2024/Auto-GUI/dataset/blip/single_blip_train_llava_10000_caption_elements_llama3_70b.json") |
| |
|
| |
|
| | retrival_dict = {} |
| | for index, i in enumerate(data): |
| | retrival_dict[i['image']] = index |
| |
|
| | path = '/code/ICLR_2024/Auto-GUI/dataset/' |
| | image_id = [ x['image'].split('/')[2].split('.')[0] for x in data] |
| | |
| | all_pair_id = {} |
| | all_pair_key = [] |
| | for i in image_id: |
| | key = i.split('_')[0] |
| | all_pair_id[key] = [] |
| | all_pair_key.append(key) |
| |
|
| | for i in image_id: |
| | key = i.split('_')[0] |
| | value = i.split('_')[1] |
| | all_pair_id[key].append(value) |
| |
|
| | all_pair_key = list(set(all_pair_key)) |
| | path2 = 'blip/single_texts_splits/' |
| |
|
| |
|
| | from tqdm import tqdm |
| | for i in tqdm(all_pair_key[770:]): |
| |
|
| | num_list = all_pair_id[i] |
| | for j in num_list: |
| |
|
| | retival_path = path2 + i + '_' + j + '.png' |
| | new_path = path + path2 + i + '_' + j + '.png' |
| | ids = retrival_dict[retival_path] |
| |
|
| | image_path = path + data[ids]['image'] |
| | caption = data[ids]['caption'] |
| | Previous = data[ids]['conversations'][0]['value'] |
| |
|
| | Previous = Previous.lower() |
| | task = Previous.split('goal')[1] |
| | |
| | Demo_prompt_step1 = """ |
| | List all the application name and location in the image that can be interacted with, the result shoudl be like a list |
| | """ |
| |
|
| | im_64 = img2base64(image_path) |
| | msgs = [{"role": "user", "content": Demo_prompt_step1}] |
| | inputs = {"image": im_64, "question": json.dumps(msgs)} |
| | answer = chat_model.chat(inputs) |
| |
|
| | data[ids]['icon_list_raw'] = answer |
| | pprint.pprint(answer) |
| |
|
| | prompt = """ ##### refine it to a list, list name must be elements , just like: |
| | elements = [ |
| | "Newegg", |
| | "Newegg CEO", |
| | "Newegg customer service", |
| | "Newegg founder", |
| | "Newegg promo code", |
| | "Newegg return policy", |
| | "Newegg revenue", |
| | "Newegg military discounts"] |
| | |
| | Answer the python list only! |
| | ##### """ |
| | |
| | import time |
| | time.sleep(2) |
| |
|
| | api_request_json = { |
| | "model": "llama3-70b", |
| | "messages": [ |
| | {"role": "system", "content": "You are a assistant that will handle the corresponding text formatting for me."}, |
| | {"role": "user", "content": answer + prompt}, |
| | |
| | ], |
| | "max_tokens": 1024 |
| | |
| | } |
| |
|
| | try: |
| | |
| | response = llama.run(api_request_json) |
| | new_answer = response.json()['choices'][0]['message']['content'] |
| | print('======================================================') |
| | pprint.pprint(new_answer) |
| | print('======================================================') |
| | except Exception as e: |
| | print(f"Error in LLAMA API Generation : {e}") |
| | import time |
| | time.sleep(30) |
| | continue |
| | |
| | try: |
| | exec(new_answer) |
| | data[ids]['icon_list'] = elements |
| | except Exception as e: |
| | print(f"Error in setting data[ids]['icon_list']: {e}") |
| | continue |
| |
|
| | |
| |
|
| | write_json('/code/ICLR_2024/Auto-GUI/dataset/blip/single_blip_train_llava_10000_caption_elements_llama3_70b.json',data) |
| |
|
| | |
| |
|