Changes
On August 9, 2022 at 3:57:38 PM UTC, Alp Öktem:
-
Added resource Ladino - Turkish to Synthetic parallel corpora LAD-EN, TR, ES
f | 1 | { | f | 1 | { |
2 | "author": "", | 2 | "author": "", | ||
3 | "author_email": "", | 3 | "author_email": "", | ||
4 | "creator_user_id": "add784a5-9327-4063-a0e3-08eb34a96d52", | 4 | "creator_user_id": "add784a5-9327-4063-a0e3-08eb34a96d52", | ||
5 | "extras": [ | 5 | "extras": [ | ||
6 | { | 6 | { | ||
7 | "key": "image_url", | 7 | "key": "image_url", | ||
8 | "value": "" | 8 | "value": "" | ||
9 | } | 9 | } | ||
10 | ], | 10 | ], | ||
11 | "groups": [], | 11 | "groups": [], | ||
12 | "id": "5801010b-181a-45f2-b3d2-05861953ac63", | 12 | "id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||
13 | "isopen": true, | 13 | "isopen": true, | ||
14 | "license_id": "cc-by", | 14 | "license_id": "cc-by", | ||
15 | "license_title": "Creative Commons Attribution", | 15 | "license_title": "Creative Commons Attribution", | ||
16 | "license_url": "http://www.opendefinition.org/licenses/cc-by", | 16 | "license_url": "http://www.opendefinition.org/licenses/cc-by", | ||
17 | "maintainer": "", | 17 | "maintainer": "", | ||
18 | "maintainer_email": "", | 18 | "maintainer_email": "", | ||
19 | "metadata_created": "2022-08-09T15:56:40.059317", | 19 | "metadata_created": "2022-08-09T15:56:40.059317", | ||
n | 20 | "metadata_modified": "2022-08-09T15:57:05.908168", | n | 20 | "metadata_modified": "2022-08-09T15:57:38.623225", |
21 | "name": "synthetic-parallel-data", | 21 | "name": "synthetic-parallel-data", | ||
22 | "notes": "Synthetically produced parallel data using rule-based | 22 | "notes": "Synthetically produced parallel data using rule-based | ||
23 | Spanish-Ladino translation.\r\n\r\nSizes:\r\n\r\nLadino-Spanish: | 23 | Spanish-Ladino translation.\r\n\r\nSizes:\r\n\r\nLadino-Spanish: | ||
24 | 10,322,033 sentences\r\n\r\nLadino-Turkish: 4,574,021 | 24 | 10,322,033 sentences\r\n\r\nLadino-Turkish: 4,574,021 | ||
25 | sentences\r\n\r\nLadino-English: 5,748,012 sentences\r\n\r\nPaper: | 25 | sentences\r\n\r\nLadino-English: 5,748,012 sentences\r\n\r\nPaper: | ||
26 | https://arxiv.org/abs/2205.15599\r\n\r\nThis dataset is created as | 26 | https://arxiv.org/abs/2205.15599\r\n\r\nThis dataset is created as | ||
27 | part of project \"Judeo-Spanish: Connecting the two ends of the | 27 | part of project \"Judeo-Spanish: Connecting the two ends of the | ||
28 | Mediterranean\" carried out by Col\u00b7lectivaT and Sephardic Center | 28 | Mediterranean\" carried out by Col\u00b7lectivaT and Sephardic Center | ||
29 | of Istanbul within the framework of the \u201cGrant Scheme for Common | 29 | of Istanbul within the framework of the \u201cGrant Scheme for Common | ||
30 | Cultural Heritage: Preservation and Dialogue between Turkey and the | 30 | Cultural Heritage: Preservation and Dialogue between Turkey and the | ||
31 | EU\u2013II (CCH-II)\u201d implemented by the Ministry of Culture and | 31 | EU\u2013II (CCH-II)\u201d implemented by the Ministry of Culture and | ||
32 | Tourism of the Republic of Turkey with the financial support of the | 32 | Tourism of the Republic of Turkey with the financial support of the | ||
33 | European Union. The content of this website is the sole responsibility | 33 | European Union. The content of this website is the sole responsibility | ||
34 | of Col\u00b7lectivaT and does not necessarily reflect the views of the | 34 | of Col\u00b7lectivaT and does not necessarily reflect the views of the | ||
35 | European Union.", | 35 | European Union.", | ||
n | 36 | "num_resources": 1, | n | 36 | "num_resources": 2, |
37 | "num_tags": 1, | 37 | "num_tags": 1, | ||
38 | "organization": { | 38 | "organization": { | ||
39 | "approval_status": "approved", | 39 | "approval_status": "approved", | ||
40 | "created": "2022-08-09T08:55:19.044966", | 40 | "created": "2022-08-09T08:55:19.044966", | ||
41 | "description": "Col\u00b7lectivaT is a non-profit cooperative | 41 | "description": "Col\u00b7lectivaT is a non-profit cooperative | ||
42 | formed by knowledge workers that provides all-around services of | 42 | formed by knowledge workers that provides all-around services of | ||
43 | cultural translation, research and technological services for | 43 | cultural translation, research and technological services for | ||
44 | collaborative and linguistic work.", | 44 | collaborative and linguistic work.", | ||
45 | "id": "0510debc-4a80-4144-b198-def8e40d28d9", | 45 | "id": "0510debc-4a80-4144-b198-def8e40d28d9", | ||
46 | "image_url": "https://collectivat.cat/img/logo_sm.png", | 46 | "image_url": "https://collectivat.cat/img/logo_sm.png", | ||
47 | "is_organization": true, | 47 | "is_organization": true, | ||
48 | "name": "col-lectivat", | 48 | "name": "col-lectivat", | ||
49 | "state": "active", | 49 | "state": "active", | ||
50 | "title": "Col\u00b7lectivaT", | 50 | "title": "Col\u00b7lectivaT", | ||
51 | "type": "organization" | 51 | "type": "organization" | ||
52 | }, | 52 | }, | ||
53 | "owner_org": "0510debc-4a80-4144-b198-def8e40d28d9", | 53 | "owner_org": "0510debc-4a80-4144-b198-def8e40d28d9", | ||
54 | "private": false, | 54 | "private": false, | ||
55 | "relationships_as_object": [], | 55 | "relationships_as_object": [], | ||
56 | "relationships_as_subject": [], | 56 | "relationships_as_subject": [], | ||
57 | "resources": [ | 57 | "resources": [ | ||
58 | { | 58 | { | ||
59 | "cache_last_updated": null, | 59 | "cache_last_updated": null, | ||
60 | "cache_url": null, | 60 | "cache_url": null, | ||
61 | "created": "2022-08-09T15:57:05.916509", | 61 | "created": "2022-08-09T15:57:05.916509", | ||
62 | "description": "", | 62 | "description": "", | ||
63 | "format": "ZIP", | 63 | "format": "ZIP", | ||
64 | "hash": "", | 64 | "hash": "", | ||
65 | "id": "f267ee8d-6e12-48a0-9c6f-4ce41ee11255", | 65 | "id": "f267ee8d-6e12-48a0-9c6f-4ce41ee11255", | ||
66 | "last_modified": null, | 66 | "last_modified": null, | ||
67 | "metadata_modified": "2022-08-09T15:57:05.911158", | 67 | "metadata_modified": "2022-08-09T15:57:05.911158", | ||
68 | "mimetype": "application/zip", | 68 | "mimetype": "application/zip", | ||
69 | "mimetype_inner": null, | 69 | "mimetype_inner": null, | ||
70 | "name": "Ladino - English", | 70 | "name": "Ladino - English", | ||
71 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | 71 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||
72 | "position": 0, | 72 | "position": 0, | ||
73 | "resource_type": null, | 73 | "resource_type": null, | ||
74 | "size": null, | 74 | "size": null, | ||
75 | "state": "active", | 75 | "state": "active", | ||
76 | "url": | 76 | "url": | ||
77 | //collectivat.cat/share/ladino_synthetic_parallel_dataset_en.csv.zip", | 77 | //collectivat.cat/share/ladino_synthetic_parallel_dataset_en.csv.zip", | ||
78 | "url_type": null | 78 | "url_type": null | ||
t | t | 79 | }, | ||
80 | { | ||||
81 | "cache_last_updated": null, | ||||
82 | "cache_url": null, | ||||
83 | "created": "2022-08-09T15:57:38.631681", | ||||
84 | "description": "", | ||||
85 | "format": "ZIP", | ||||
86 | "hash": "", | ||||
87 | "id": "bac5f97b-9054-4ffe-9d8f-0022af74a508", | ||||
88 | "last_modified": null, | ||||
89 | "metadata_modified": "2022-08-09T15:57:38.626254", | ||||
90 | "mimetype": "application/zip", | ||||
91 | "mimetype_inner": null, | ||||
92 | "name": "Ladino - Turkish", | ||||
93 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||||
94 | "position": 1, | ||||
95 | "resource_type": null, | ||||
96 | "size": null, | ||||
97 | "state": "active", | ||||
98 | "url": | ||||
99 | //collectivat.cat/share/ladino_synthetic_parallel_dataset_tr.csv.zip", | ||||
100 | "url_type": null | ||||
79 | } | 101 | } | ||
80 | ], | 102 | ], | ||
81 | "state": "draft", | 103 | "state": "draft", | ||
82 | "tags": [ | 104 | "tags": [ | ||
83 | { | 105 | { | ||
84 | "display_name": "parallel text", | 106 | "display_name": "parallel text", | ||
85 | "id": "896bbff4-e82d-4f13-9668-dd08a37eb81c", | 107 | "id": "896bbff4-e82d-4f13-9668-dd08a37eb81c", | ||
86 | "name": "parallel text", | 108 | "name": "parallel text", | ||
87 | "state": "active", | 109 | "state": "active", | ||
88 | "vocabulary_id": null | 110 | "vocabulary_id": null | ||
89 | } | 111 | } | ||
90 | ], | 112 | ], | ||
91 | "title": "Synthetic parallel corpora LAD-EN, TR, ES", | 113 | "title": "Synthetic parallel corpora LAD-EN, TR, ES", | ||
92 | "type": "dataset", | 114 | "type": "dataset", | ||
93 | "url": "", | 115 | "url": "", | ||
94 | "version": "" | 116 | "version": "" | ||
95 | } | 117 | } |