From e82f3f9ac845ff1c32a462ef056fd2a06f005708 Mon Sep 17 00:00:00 2001 From: WHoutstanding Date: Thu, 12 Feb 2026 23:10:32 +0800 Subject: [PATCH 1/3] convert sample from torch to paddle --- tools/torch_to_paddle/convert.py | 242 ++++++++++++++++++ tools/torch_to_paddle/torch_to_paddle_test.sh | 14 + 2 files changed, 256 insertions(+) create mode 100644 tools/torch_to_paddle/convert.py create mode 100755 tools/torch_to_paddle/torch_to_paddle_test.sh diff --git a/tools/torch_to_paddle/convert.py b/tools/torch_to_paddle/convert.py new file mode 100644 index 000000000..c537bad11 --- /dev/null +++ b/tools/torch_to_paddle/convert.py @@ -0,0 +1,242 @@ +import argparse +import subprocess +import os +import re +import json +import shutil +from pathlib import Path + + +def _get_model_path_list(args): + # Get a list of model path from args. + assert args.model_path_list is not None + with open(args.model_path_list) as f: + yield from ( + clean_line + for line in f + for clean_line in [line.strip()] + if len(clean_line) > 0 + if not clean_line.startswith("#") + ) + +def remove_string_from_model(input_file, target_string): + # Delete a fixed string from model.py. + if not os.path.exists(input_file): + print(f"Error: Not found {input_file}") + return + + try: + with open(input_file, 'r', encoding='utf-8') as f: + content = f.read() + + if target_string not in content: + return + + new_content = content.replace(target_string, "") + + with open(input_file, 'w', encoding='utf-8') as f: + f.write(new_content) + + except Exception as e: + print(f"Error: {e}") + + +def run_paconvert(input_file, output_file, output_log): + # Run padconvet to convert model.py from torch to paddle. + Path(output_log).parent.mkdir(parents=True, exist_ok=True) + + command = [ + "paconvert", + "-i", input_file, + "-o", output_file, + "--log_dir", output_log, + "--show_unsupport_api" + ] + + try: + subprocess.run(command, check=True) + print("Convert successfully") + except subprocess.CalledProcessError as e: + print(f"Convert failed: {e}") + except FileNotFoundError: + print("Error: The paconvert command could not be found. Please ensure that the tool is installed.") + + +def convert_model_py(model_path, output_dir): + # Convert model.py from torch to paddle. + input_model_py = os.path.join(model_path, "model.py") + output_model_py = os.path.join(output_dir, "model.py") + output_log = os.path.join(output_dir, "log.log") + run_paconvert(input_model_py, output_model_py, output_log) + remove_string_from_model(output_model_py, ">>>>>>") + +def convert_weight_meta_py(model_path, output_dir): + # Convert weight_meta.py from torch to paddle. + input_file = os.path.join(model_path, 'weight_meta.py') + output_file = os.path.join(output_dir, 'weight_meta.py') + + if not os.path.exists(input_file): + print(f"[Error] Not found: {input_file}") + return + + pattern = r"(dtype\s*=\s*['\"])torch(?=.*['\"])" + replacement = r"\1paddle" + + try: + with open(input_file, 'r', encoding='utf-8') as f: + content = f.read() + + new_content = re.sub(pattern, replacement, content) + + with open(output_file, 'w', encoding='utf-8') as f: + f.write(new_content) + + except Exception as e: + print(f"Error: {e}") + + +def convert_graph_net_json(model_path, output_dir): + # Convert graph_net.json from torch to paddle. + input_file = os.path.join(model_path, 'graph_net.json') + output_file = os.path.join(output_dir, 'graph_net.json') + + if not os.path.exists(input_file): + print(f"Error: {input_file} not found.") + return + + with open(input_file, 'r', encoding='utf-8') as f: + data = json.load(f) + + if data.get("framework") == "torch": + data["framework"] = "paddle" + + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=4, ensure_ascii=False) + + +def copy_sample_files(model_path, output_dir, files_copied): + # Copy files of sample. + for fname in files_copied: + input_file = os.path.join(model_path, fname) + output_file = os.path.join(output_dir, fname) + shutil.copy(input_file, output_file) + + +def convert_sample_from_torch_to_paddle(model_path, output_dir): + # Convert a sample from torch to paddle. + files_copied = ["input_meta.py", "input_tensor_constraints.py", "graph_hash.txt"] + convert_model_py(model_path, output_dir) + convert_weight_meta_py(model_path, output_dir) + convert_graph_net_json(model_path, output_dir) + copy_sample_files(model_path, output_dir, files_copied) + +def get_api_convert_rate(log_path): + # Get api convert rate of sample. + try: + with open(log_path, 'r', encoding='utf-8') as f: + for line in f: + match = re.search(r'Convert Rate is:\s*(\d+\.?\d*)%', line) + if match: + rate = match.group(1) + return rate + + except FileNotFoundError: + print(f"Not found: {log_path}") + +def get_api_unsupported(log_path): + # Get a list of api unsupported. + api_unsupported_list = [] + try: + with open(log_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + if line.startswith("torch."): + parts = line.split() + if len(parts) >= 2: + name = parts[0] + api_unsupported_list.append((name)) + + except FileNotFoundError: + print(f"Not found: {log_path}") + + return api_unsupported_list + +def save_result_to_json(rel_model_path, result, result_file): + # Save result of sample to result.json. + all_data = {} + try: + with open(result_file, 'r', encoding='utf-8') as json_f: + all_data = json.load(json_f) + + except (json.JSONDecodeError, ValueError): + all_data = {} + + all_data[rel_model_path] = { + "api_convert_rate": result[0], + "api_unsupported_list": result[1] + } + + with open(result_file, 'w', encoding='utf-8') as json_f: + json.dump(all_data, json_f, indent=4, ensure_ascii=False) + + +def convert_log_process(rel_model_path, output_dir, result_file): + # Get api convert rate and api unsupported from log. + log_path = os.path.join(output_dir, "log.log") + api_convert_rate = get_api_convert_rate(log_path) + api_unsupported_list = get_api_unsupported(log_path) + result = [api_convert_rate, api_unsupported_list] + + save_result_to_json(rel_model_path, result, result_file) + + +def main(args): + # Convert samples from torch to paddle. + model_path_prefix = args.model_path_prefix + model_path_list = list(_get_model_path_list(args)) + output_dir = args.output_dir + result_file = args.result_file + + for model_path in model_path_list: + abs_model_path = os.path.join(model_path_prefix, model_path) + abs_output_dir = os.path.join(output_dir, model_path.split("samples/", 1)[-1]) + convert_sample_from_torch_to_paddle(abs_model_path, abs_output_dir) + convert_log_process(model_path, abs_output_dir, result_file) + return + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Test compiler performance.") + parser.add_argument( + "--model-path-list", + type=str, + required=False, + default=None, + help="Path of file containing model paths.", + ) + + parser.add_argument( + "--output-dir", + type=str, + required=False, + default=None, + help="Output directory of samples from torch to paddle.", + ) + + parser.add_argument( + "--model-path-prefix", + type=str, + required=False, + default=None, + help="Path prefix of samples in list of model path.", + ) + + parser.add_argument( + "--result-file", + type=str, + required=False, + default=None, + help="Result of convert samples from torch to paddle.", + ) + + args = parser.parse_args() + main(args=args) diff --git a/tools/torch_to_paddle/torch_to_paddle_test.sh b/tools/torch_to_paddle/torch_to_paddle_test.sh new file mode 100755 index 000000000..61aadf374 --- /dev/null +++ b/tools/torch_to_paddle/torch_to_paddle_test.sh @@ -0,0 +1,14 @@ +graph_net=$(python3 -c "import graph_net; import os; print( +os.path.dirname(graph_net.__file__))") +GraphNet="$graph_net/.." +output_dir="$GraphNet/torch_to_paddle_samples" +mkdir -p "$output_dir" +log_dir="$GraphNet/torch_to_paddle_samples" +result_file="$GraphNet/torch_to_paddle_samples/result.json" +touch "$result_file" + +python3 -m tools.torch_to_paddle.convert \ +--model-path-prefix "$GraphNet" \ +--model-path-list "graph_net/config/small100_torch_samples_list.txt" \ +--output-dir "$output_dir" \ +--result-file "$result_file" \ No newline at end of file From f02b686ef985dbee845e46f473847b8b61ce77a3 Mon Sep 17 00:00:00 2001 From: WHoutstanding Date: Thu, 12 Feb 2026 23:14:32 +0800 Subject: [PATCH 2/3] fix format of convert.py --- tools/torch_to_paddle/convert.py | 73 ++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/tools/torch_to_paddle/convert.py b/tools/torch_to_paddle/convert.py index c537bad11..3b3e38e6c 100644 --- a/tools/torch_to_paddle/convert.py +++ b/tools/torch_to_paddle/convert.py @@ -19,6 +19,7 @@ def _get_model_path_list(args): if not clean_line.startswith("#") ) + def remove_string_from_model(input_file, target_string): # Delete a fixed string from model.py. if not os.path.exists(input_file): @@ -26,7 +27,7 @@ def remove_string_from_model(input_file, target_string): return try: - with open(input_file, 'r', encoding='utf-8') as f: + with open(input_file, "r", encoding="utf-8") as f: content = f.read() if target_string not in content: @@ -34,7 +35,7 @@ def remove_string_from_model(input_file, target_string): new_content = content.replace(target_string, "") - with open(input_file, 'w', encoding='utf-8') as f: + with open(input_file, "w", encoding="utf-8") as f: f.write(new_content) except Exception as e: @@ -47,10 +48,13 @@ def run_paconvert(input_file, output_file, output_log): command = [ "paconvert", - "-i", input_file, - "-o", output_file, - "--log_dir", output_log, - "--show_unsupport_api" + "-i", + input_file, + "-o", + output_file, + "--log_dir", + output_log, + "--show_unsupport_api", ] try: @@ -59,10 +63,12 @@ def run_paconvert(input_file, output_file, output_log): except subprocess.CalledProcessError as e: print(f"Convert failed: {e}") except FileNotFoundError: - print("Error: The paconvert command could not be found. Please ensure that the tool is installed.") - + print( + "Error: The paconvert command could not be found. Please ensure that the tool is installed." + ) -def convert_model_py(model_path, output_dir): + +def convert_model_py(model_path, output_dir): # Convert model.py from torch to paddle. input_model_py = os.path.join(model_path, "model.py") output_model_py = os.path.join(output_dir, "model.py") @@ -70,25 +76,26 @@ def convert_model_py(model_path, output_dir): run_paconvert(input_model_py, output_model_py, output_log) remove_string_from_model(output_model_py, ">>>>>>") + def convert_weight_meta_py(model_path, output_dir): # Convert weight_meta.py from torch to paddle. - input_file = os.path.join(model_path, 'weight_meta.py') - output_file = os.path.join(output_dir, 'weight_meta.py') - + input_file = os.path.join(model_path, "weight_meta.py") + output_file = os.path.join(output_dir, "weight_meta.py") + if not os.path.exists(input_file): print(f"[Error] Not found: {input_file}") return - + pattern = r"(dtype\s*=\s*['\"])torch(?=.*['\"])" replacement = r"\1paddle" try: - with open(input_file, 'r', encoding='utf-8') as f: + with open(input_file, "r", encoding="utf-8") as f: content = f.read() new_content = re.sub(pattern, replacement, content) - with open(output_file, 'w', encoding='utf-8') as f: + with open(output_file, "w", encoding="utf-8") as f: f.write(new_content) except Exception as e: @@ -97,22 +104,22 @@ def convert_weight_meta_py(model_path, output_dir): def convert_graph_net_json(model_path, output_dir): # Convert graph_net.json from torch to paddle. - input_file = os.path.join(model_path, 'graph_net.json') - output_file = os.path.join(output_dir, 'graph_net.json') + input_file = os.path.join(model_path, "graph_net.json") + output_file = os.path.join(output_dir, "graph_net.json") if not os.path.exists(input_file): print(f"Error: {input_file} not found.") return - with open(input_file, 'r', encoding='utf-8') as f: + with open(input_file, "r", encoding="utf-8") as f: data = json.load(f) if data.get("framework") == "torch": data["framework"] = "paddle" - with open(output_file, 'w', encoding='utf-8') as f: + with open(output_file, "w", encoding="utf-8") as f: json.dump(data, f, indent=4, ensure_ascii=False) - + def copy_sample_files(model_path, output_dir, files_copied): # Copy files of sample. @@ -130,28 +137,30 @@ def convert_sample_from_torch_to_paddle(model_path, output_dir): convert_graph_net_json(model_path, output_dir) copy_sample_files(model_path, output_dir, files_copied) + def get_api_convert_rate(log_path): # Get api convert rate of sample. try: - with open(log_path, 'r', encoding='utf-8') as f: + with open(log_path, "r", encoding="utf-8") as f: for line in f: - match = re.search(r'Convert Rate is:\s*(\d+\.?\d*)%', line) + match = re.search(r"Convert Rate is:\s*(\d+\.?\d*)%", line) if match: - rate = match.group(1) + rate = match.group(1) return rate - + except FileNotFoundError: print(f"Not found: {log_path}") + def get_api_unsupported(log_path): # Get a list of api unsupported. api_unsupported_list = [] - try: - with open(log_path, 'r', encoding='utf-8') as f: + try: + with open(log_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line.startswith("torch."): - parts = line.split() + parts = line.split() if len(parts) >= 2: name = parts[0] api_unsupported_list.append((name)) @@ -161,11 +170,12 @@ def get_api_unsupported(log_path): return api_unsupported_list + def save_result_to_json(rel_model_path, result, result_file): # Save result of sample to result.json. all_data = {} try: - with open(result_file, 'r', encoding='utf-8') as json_f: + with open(result_file, "r", encoding="utf-8") as json_f: all_data = json.load(json_f) except (json.JSONDecodeError, ValueError): @@ -173,12 +183,12 @@ def save_result_to_json(rel_model_path, result, result_file): all_data[rel_model_path] = { "api_convert_rate": result[0], - "api_unsupported_list": result[1] + "api_unsupported_list": result[1], } - with open(result_file, 'w', encoding='utf-8') as json_f: + with open(result_file, "w", encoding="utf-8") as json_f: json.dump(all_data, json_f, indent=4, ensure_ascii=False) - + def convert_log_process(rel_model_path, output_dir, result_file): # Get api convert rate and api unsupported from log. @@ -204,6 +214,7 @@ def main(args): convert_log_process(model_path, abs_output_dir, result_file) return + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Test compiler performance.") parser.add_argument( From 5591c6350c342bf985e183c4cbf51518ee5942a4 Mon Sep 17 00:00:00 2001 From: WHoutstanding Date: Mon, 16 Feb 2026 23:20:03 +0800 Subject: [PATCH 3/3] First: unstable to stale; Second: torch to paddle --- tools/torch_to_paddle/convert.py | 231 ++---------------- tools/torch_to_paddle/file_processors.py | 209 ++++++++++++++++ tools/torch_to_paddle/torch_to_paddle_test.sh | 6 +- tools/torch_to_paddle/utils.py | 123 ++++++++++ 4 files changed, 360 insertions(+), 209 deletions(-) create mode 100644 tools/torch_to_paddle/file_processors.py create mode 100644 tools/torch_to_paddle/utils.py diff --git a/tools/torch_to_paddle/convert.py b/tools/torch_to_paddle/convert.py index 3b3e38e6c..8d341d699 100644 --- a/tools/torch_to_paddle/convert.py +++ b/tools/torch_to_paddle/convert.py @@ -1,222 +1,43 @@ import argparse -import subprocess import os -import re -import json -import shutil -from pathlib import Path - - -def _get_model_path_list(args): - # Get a list of model path from args. - assert args.model_path_list is not None - with open(args.model_path_list) as f: - yield from ( - clean_line - for line in f - for clean_line in [line.strip()] - if len(clean_line) > 0 - if not clean_line.startswith("#") - ) - - -def remove_string_from_model(input_file, target_string): - # Delete a fixed string from model.py. - if not os.path.exists(input_file): - print(f"Error: Not found {input_file}") - return - - try: - with open(input_file, "r", encoding="utf-8") as f: - content = f.read() - - if target_string not in content: - return - - new_content = content.replace(target_string, "") - - with open(input_file, "w", encoding="utf-8") as f: - f.write(new_content) - - except Exception as e: - print(f"Error: {e}") - - -def run_paconvert(input_file, output_file, output_log): - # Run padconvet to convert model.py from torch to paddle. - Path(output_log).parent.mkdir(parents=True, exist_ok=True) - - command = [ - "paconvert", - "-i", - input_file, - "-o", - output_file, - "--log_dir", - output_log, - "--show_unsupport_api", - ] - - try: - subprocess.run(command, check=True) - print("Convert successfully") - except subprocess.CalledProcessError as e: - print(f"Convert failed: {e}") - except FileNotFoundError: - print( - "Error: The paconvert command could not be found. Please ensure that the tool is installed." - ) - - -def convert_model_py(model_path, output_dir): - # Convert model.py from torch to paddle. - input_model_py = os.path.join(model_path, "model.py") - output_model_py = os.path.join(output_dir, "model.py") - output_log = os.path.join(output_dir, "log.log") - run_paconvert(input_model_py, output_model_py, output_log) - remove_string_from_model(output_model_py, ">>>>>>") - - -def convert_weight_meta_py(model_path, output_dir): - # Convert weight_meta.py from torch to paddle. - input_file = os.path.join(model_path, "weight_meta.py") - output_file = os.path.join(output_dir, "weight_meta.py") - - if not os.path.exists(input_file): - print(f"[Error] Not found: {input_file}") - return - - pattern = r"(dtype\s*=\s*['\"])torch(?=.*['\"])" - replacement = r"\1paddle" - - try: - with open(input_file, "r", encoding="utf-8") as f: - content = f.read() - - new_content = re.sub(pattern, replacement, content) - - with open(output_file, "w", encoding="utf-8") as f: - f.write(new_content) - - except Exception as e: - print(f"Error: {e}") - - -def convert_graph_net_json(model_path, output_dir): - # Convert graph_net.json from torch to paddle. - input_file = os.path.join(model_path, "graph_net.json") - output_file = os.path.join(output_dir, "graph_net.json") - - if not os.path.exists(input_file): - print(f"Error: {input_file} not found.") - return - - with open(input_file, "r", encoding="utf-8") as f: - data = json.load(f) - - if data.get("framework") == "torch": - data["framework"] = "paddle" - - with open(output_file, "w", encoding="utf-8") as f: - json.dump(data, f, indent=4, ensure_ascii=False) - - -def copy_sample_files(model_path, output_dir, files_copied): - # Copy files of sample. - for fname in files_copied: - input_file = os.path.join(model_path, fname) - output_file = os.path.join(output_dir, fname) - shutil.copy(input_file, output_file) - - -def convert_sample_from_torch_to_paddle(model_path, output_dir): - # Convert a sample from torch to paddle. - files_copied = ["input_meta.py", "input_tensor_constraints.py", "graph_hash.txt"] - convert_model_py(model_path, output_dir) - convert_weight_meta_py(model_path, output_dir) - convert_graph_net_json(model_path, output_dir) - copy_sample_files(model_path, output_dir, files_copied) - - -def get_api_convert_rate(log_path): - # Get api convert rate of sample. - try: - with open(log_path, "r", encoding="utf-8") as f: - for line in f: - match = re.search(r"Convert Rate is:\s*(\d+\.?\d*)%", line) - if match: - rate = match.group(1) - return rate - - except FileNotFoundError: - print(f"Not found: {log_path}") - - -def get_api_unsupported(log_path): - # Get a list of api unsupported. - api_unsupported_list = [] - try: - with open(log_path, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if line.startswith("torch."): - parts = line.split() - if len(parts) >= 2: - name = parts[0] - api_unsupported_list.append((name)) - - except FileNotFoundError: - print(f"Not found: {log_path}") - - return api_unsupported_list - - -def save_result_to_json(rel_model_path, result, result_file): - # Save result of sample to result.json. - all_data = {} - try: - with open(result_file, "r", encoding="utf-8") as json_f: - all_data = json.load(json_f) - - except (json.JSONDecodeError, ValueError): - all_data = {} - - all_data[rel_model_path] = { - "api_convert_rate": result[0], - "api_unsupported_list": result[1], - } - - with open(result_file, "w", encoding="utf-8") as json_f: - json.dump(all_data, json_f, indent=4, ensure_ascii=False) - - -def convert_log_process(rel_model_path, output_dir, result_file): - # Get api convert rate and api unsupported from log. - log_path = os.path.join(output_dir, "log.log") - api_convert_rate = get_api_convert_rate(log_path) - api_unsupported_list = get_api_unsupported(log_path) - result = [api_convert_rate, api_unsupported_list] - - save_result_to_json(rel_model_path, result, result_file) +from tools.torch_to_paddle.utils import ( + get_model_path_list, + get_convert_log, + filter_and_save_unconverted_api, + save_sample_api_convert_rate, +) +from tools.torch_to_paddle.file_processors import ( + convert_api_from_unstable_to_stable, + convert_sample_from_torch_to_paddle, +) def main(args): # Convert samples from torch to paddle. model_path_prefix = args.model_path_prefix - model_path_list = list(_get_model_path_list(args)) + model_path_list = list(get_model_path_list(args.model_path_list)) output_dir = args.output_dir - result_file = args.result_file + log_dir = args.log_dir + summary_dir = output_dir for model_path in model_path_list: abs_model_path = os.path.join(model_path_prefix, model_path) abs_output_dir = os.path.join(output_dir, model_path.split("samples/", 1)[-1]) - convert_sample_from_torch_to_paddle(abs_model_path, abs_output_dir) - convert_log_process(model_path, abs_output_dir, result_file) + abs_log_dir = os.path.join(log_dir, model_path) + get_convert_log(abs_model_path, abs_log_dir) + filter_and_save_unconverted_api(model_path, abs_log_dir, summary_dir) + convert_api_from_unstable_to_stable( + model_path, abs_model_path, abs_output_dir, summary_dir + ) + convert_sample_from_torch_to_paddle(abs_model_path, abs_output_dir, abs_log_dir) + save_sample_api_convert_rate(model_path, abs_log_dir, summary_dir) return if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Test compiler performance.") + parser = argparse.ArgumentParser( + description="Convert samples from torch to paddle." + ) parser.add_argument( "--model-path-list", type=str, @@ -242,11 +63,11 @@ def main(args): ) parser.add_argument( - "--result-file", + "--log-dir", type=str, required=False, default=None, - help="Result of convert samples from torch to paddle.", + help="Log directory of convert samples from torch to paddle.", ) args = parser.parse_args() diff --git a/tools/torch_to_paddle/file_processors.py b/tools/torch_to_paddle/file_processors.py new file mode 100644 index 000000000..a6bfcbf6f --- /dev/null +++ b/tools/torch_to_paddle/file_processors.py @@ -0,0 +1,209 @@ +import os +import re +import json +import shutil +import torch +import gc + +from pathlib import Path +from graph_net.torch import utils +from torch.fx.passes.shape_prop import ShapeProp +from graph_net.torch.fx_graph_module_util import get_torch_module_and_inputs +from graph_net.torch.fx_graph_parse_util import parse_sole_graph_module +from graph_net.torch.fx_graph_serialize_util import serialize_graph_module_to_str +from tools.torch_to_paddle.utils import save_unconverted_api, execute_paconvert +from graph_net_bench.torch.backend.unstable_to_stable_backend import ( + UnstableToStableBackend, +) + + +API_UNSTABLE_TO_STABLE = { + "torch._C._nn.one_hot": "_impl_unstable_to_stable_one_hot", + "torch._C._linalg.linalg_vector_norm": "_impl_unstable_to_stable_linalg_vector_norm", + "torch._C._fft.fft_irfft": "_impl_unstable_to_stable_irfft", + "torch._C._special.special_logit": "_impl_unstable_to_stable_special_logit", + "torch._C._fft.fft_rfft": "_impl_unstable_to_stable_rfft", + "torch._C._nn.pad": "_impl_unstable_to_stable_pad", + "torch._C._nn.gelu": "_impl_unstable_to_stable_gelu", + "torch._C._nn.softplus": "_impl_unstable_to_stable_softplus", + "torch._C._nn.scaled_dot_product_attention": "_impl_unstable_to_stable_sdpa", + "torch._C._linalg.linalg_norm": "_impl_unstable_to_stable_linalg_norm", + "torch._C._nn.linear": "_impl_unstable_to_stable_linear_to_functional_linear", + "torch._C._set_grad_enabled": "_impl_unstable_to_stable_set_grad_enabled", + "torch._C._nn.avg_pool2d": "_impl_unstable_to_stable_avg_pool2d", + "torch._C._fft.fft_fftn": "_impl_unstable_to_stable_fftn", +} + + +def get_gm_from_model_path(model_path): + # Parse the computation graph + module, inputs = get_torch_module_and_inputs(model_path) + model = parse_sole_graph_module(module, inputs) + + with torch.no_grad(): + ShapeProp(model).propagate(*inputs) + return model + + +def read_unconverted_api(rel_model_path, summary_file): + # Read unconverted api of sample. + try: + with open(summary_file, "r", encoding="utf-8") as json_f: + all_data = json.load(json_f) + except FileNotFoundError: + print(f"Not found: {summary_file}") + + return all_data[rel_model_path]["api_unsupported_list"] + + +def gm_unstable_to_stable(gm, unconverted_api): + # Convert api of gm from unstable to stable. + converter = UnstableToStableBackend({}) + new_unconverted_api = [] + for api in unconverted_api: + if api in API_UNSTABLE_TO_STABLE: + converter_method_name = API_UNSTABLE_TO_STABLE[api] + converter_method = getattr(converter, converter_method_name) + gm = converter_method(gm) + else: + new_unconverted_api.append(api) + return gm, new_unconverted_api + + +def save_gm_to_model_py(gm, output_dir): + # Save new gm to model.py. + model_code = serialize_graph_module_to_str(gm) + write_code = utils.apply_templates(model_code) + + output_model_py = os.path.join(output_dir, "model_unstable_to_stable.py") + Path(output_model_py).parent.mkdir(parents=True, exist_ok=True) + with open(output_model_py, "w") as f: + f.write(write_code) + + +def convert_api_from_unstable_to_stable( + rel_model_path, model_path, output_dir, summary_dir +): + # Convert model.py from unstable to stable. + summary_file = Path(os.path.join(summary_dir, "summary.json")) + unconverted_api = read_unconverted_api(rel_model_path, summary_file) + gm = get_gm_from_model_path(model_path) + gm_modified, new_unconverted_api = gm_unstable_to_stable(gm, unconverted_api) + save_gm_to_model_py(gm_modified, output_dir) + + del gm + del gm_modified + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + save_unconverted_api(rel_model_path, new_unconverted_api, summary_file) + + +def remove_string_from_model(input_file, target_string): + # Delete a fixed string from model.py. + if not os.path.exists(input_file): + print(f"Error: Not found {input_file}") + return + + try: + with open(input_file, "r", encoding="utf-8") as f: + content = f.read() + + if target_string not in content: + return + + new_content = content.replace(target_string, "") + + with open(input_file, "w", encoding="utf-8") as f: + f.write(new_content) + + except Exception as e: + print(f"Error: {e}") + + +def convert_model_py(model_path, output_dir, log_dir): + # Convert model.py from torch to paddle. + input_model_py = Path(os.path.join(model_path, "model_unstable_to_stable.py")) + output_model_py = Path(os.path.join(output_dir, "model.py")) + output_log = os.path.join(log_dir, "conversion.log") + Path(output_log).parent.mkdir(parents=True, exist_ok=True) + + cmd = [ + "paconvert", + "-i", + input_model_py, + "-o", + output_model_py, + "--log_dir", + output_log, + "--show_unsupport_api", + ] + execute_paconvert(cmd) + remove_string_from_model(output_model_py, ">>>>>>") + + +def convert_weight_meta_py(model_path, output_dir): + # Convert weight_meta.py from torch to paddle. + input_file = os.path.join(model_path, "weight_meta.py") + output_file = os.path.join(output_dir, "weight_meta.py") + + if not os.path.exists(input_file): + print(f"[Error] Not found: {input_file}") + return + + pattern = r"(dtype\s*=\s*['\"])torch(?=.*['\"])" + replacement = r"\1paddle" + + try: + with open(input_file, "r", encoding="utf-8") as f: + content = f.read() + + new_content = re.sub(pattern, replacement, content) + + with open(output_file, "w", encoding="utf-8") as f: + f.write(new_content) + + except Exception as e: + print(f"Error: {e}") + + +def convert_graph_net_json(model_path, output_dir): + # Convert graph_net.json from torch to paddle. + input_file = os.path.join(model_path, "graph_net.json") + output_file = os.path.join(output_dir, "graph_net.json") + + if not os.path.exists(input_file): + print(f"Error: {input_file} not found.") + return + + with open(input_file, "r", encoding="utf-8") as f: + data = json.load(f) + + if data.get("framework") == "torch": + data["framework"] = "paddle" + + with open(output_file, "w", encoding="utf-8") as f: + json.dump(data, f, indent=4, ensure_ascii=False) + + +def copy_sample_files(model_path, output_dir, files_copied): + # Copy files of sample. + for fname in files_copied: + input_file = os.path.join(model_path, fname) + output_file = os.path.join(output_dir, fname) + shutil.copy(input_file, output_file) + + +def convert_other_files(model_path, output_dir): + # Convert other files. + files_copied = ["input_meta.py", "input_tensor_constraints.py", "graph_hash.txt"] + copy_sample_files(model_path, output_dir, files_copied) + + +def convert_sample_from_torch_to_paddle(model_path, output_dir, log_dir): + # Convert a sample from torch to paddle. + convert_model_py(output_dir, output_dir, log_dir) + convert_weight_meta_py(model_path, output_dir) + convert_graph_net_json(model_path, output_dir) + convert_other_files(model_path, output_dir) diff --git a/tools/torch_to_paddle/torch_to_paddle_test.sh b/tools/torch_to_paddle/torch_to_paddle_test.sh index 61aadf374..c150117ab 100755 --- a/tools/torch_to_paddle/torch_to_paddle_test.sh +++ b/tools/torch_to_paddle/torch_to_paddle_test.sh @@ -3,12 +3,10 @@ os.path.dirname(graph_net.__file__))") GraphNet="$graph_net/.." output_dir="$GraphNet/torch_to_paddle_samples" mkdir -p "$output_dir" -log_dir="$GraphNet/torch_to_paddle_samples" -result_file="$GraphNet/torch_to_paddle_samples/result.json" -touch "$result_file" +log_dir="$GraphNet/tools/torch_to_paddle/logs" python3 -m tools.torch_to_paddle.convert \ --model-path-prefix "$GraphNet" \ --model-path-list "graph_net/config/small100_torch_samples_list.txt" \ --output-dir "$output_dir" \ ---result-file "$result_file" \ No newline at end of file +--log-dir "$log_dir" \ \ No newline at end of file diff --git a/tools/torch_to_paddle/utils.py b/tools/torch_to_paddle/utils.py new file mode 100644 index 000000000..3d055799d --- /dev/null +++ b/tools/torch_to_paddle/utils.py @@ -0,0 +1,123 @@ +import os +import json +import subprocess +import re +from pathlib import Path + + +def get_model_path_list(model_path_list): + # Get a list of model path from args. + assert model_path_list is not None + with open(model_path_list) as f: + yield from ( + clean_line + for line in f + for clean_line in [line.strip()] + if len(clean_line) > 0 + if not clean_line.startswith("#") + ) + + +def execute_paconvert(command): + # Run padconvet to get log from torch to paddle. + try: + subprocess.run(command, check=True) + print("Convert successfully") + except subprocess.CalledProcessError as e: + print(f"Convert failed: {e}") + except FileNotFoundError: + print("Error: File not be found.") + + +def get_convert_log(model_path, log_dir): + # Get paconvert log of sample. + input_file = os.path.join(model_path, "model.py") + log_file = os.path.join(log_dir, "conversion.log") + Path(log_file).parent.mkdir(parents=True, exist_ok=True) + + cmd = [ + "paconvert", + "-i", + input_file, + "--log_dir", + log_file, + "--show_unsupport_api", + ] + + execute_paconvert(cmd) + + +def filter_unconverted_api(log_file): + # Filter unconvered api from log. + unconverted_api = [] + try: + with open(log_file, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line.startswith("torch."): + name = line.split()[0] + unconverted_api.append((name)) + except FileNotFoundError: + print(f"Not found: {log_file}") + + return unconverted_api + + +def save_unconverted_api(rel_model_path, unconverted_api, summary_file): + # Save result of sample to summary.json. + summary_file.parent.mkdir(parents=True, exist_ok=True) + if not summary_file.exists(): + with open(summary_file, "w", encoding="utf-8") as f: + json.dump({}, f) + + with open(summary_file, "r", encoding="utf-8") as json_f: + all_data = json.load(json_f) + + all_data[rel_model_path] = {"api_unsupported_list": unconverted_api} + + with open(summary_file, "w", encoding="utf-8") as json_f: + json.dump(all_data, json_f, indent=4, ensure_ascii=False) + + +def filter_and_save_unconverted_api(rel_model_path, log_dir, summary_dir): + # Get unconverted api of sample. + log_file = Path(os.path.join(log_dir, "conversion.log")) + summary_file = Path(os.path.join(summary_dir, "summary.json")) + + unconverted_api = filter_unconverted_api(log_file) + save_unconverted_api(rel_model_path, unconverted_api, summary_file) + + +def get_api_convert_rate(log_file): + # Get api convert rate of sample. + try: + with open(log_file, "r", encoding="utf-8") as f: + for line in f: + match = re.search(r"Convert Rate is:\s*(\d+\.?\d*)%", line) + if match: + rate = match.group(1) + return rate + + except FileNotFoundError: + print(f"Not found: {log_file}") + + +def save_api_convert_rate(rel_model_path, summary_file, api_convert_rate): + # Save api convert rate to summary.json. + summary_file.parent.mkdir(parents=True, exist_ok=True) + + with open(summary_file, "r", encoding="utf-8") as json_f: + all_data = json.load(json_f) + + all_data[rel_model_path]["api_convert_rate"] = api_convert_rate + + with open(summary_file, "w", encoding="utf-8") as json_f: + json.dump(all_data, json_f, indent=4, ensure_ascii=False) + + +def save_sample_api_convert_rate(rel_model_path, log_dir, summary_dir): + # Save api convet rate of sample. + summary_file = Path(os.path.join(summary_dir, "summary.json")) + log_file = Path(os.path.join(log_dir, "conversion.log")) + api_convert_rate = get_api_convert_rate(log_file) + save_api_convert_rate(rel_model_path, summary_file, api_convert_rate)