f | { | f | { |
| "author": "", | | "author": "", |
| "author_email": "", | | "author_email": "", |
| "creator_user_id": "add784a5-9327-4063-a0e3-08eb34a96d52", | | "creator_user_id": "add784a5-9327-4063-a0e3-08eb34a96d52", |
| "extras": [ | | "extras": [ |
| { | | { |
| "key": "image_url", | | "key": "image_url", |
| "value": "" | | "value": "" |
| } | | } |
| ], | | ], |
| "groups": [], | | "groups": [], |
| "id": "5801010b-181a-45f2-b3d2-05861953ac63", | | "id": "5801010b-181a-45f2-b3d2-05861953ac63", |
| "isopen": true, | | "isopen": true, |
| "license_id": "cc-by", | | "license_id": "cc-by", |
| "license_title": "Creative Commons Attribution", | | "license_title": "Creative Commons Attribution", |
| "license_url": "http://www.opendefinition.org/licenses/cc-by", | | "license_url": "http://www.opendefinition.org/licenses/cc-by", |
| "maintainer": "", | | "maintainer": "", |
| "maintainer_email": "", | | "maintainer_email": "", |
| "metadata_created": "2022-08-09T15:56:40.059317", | | "metadata_created": "2022-08-09T15:56:40.059317", |
n | "metadata_modified": "2022-08-09T15:57:38.727511", | n | "metadata_modified": "2022-08-09T15:58:31.481725", |
| "name": "synthetic-parallel-data", | | "name": "synthetic-parallel-data", |
| "notes": "Synthetically produced parallel data using rule-based | | "notes": "Synthetically produced parallel data using rule-based |
n | Spanish-Ladino translation.\r\n\r\nSizes:\r\n\r\nLadino-Spanish: | n | Spanish-Ladino translation.\r\n\r\nSizes:\r\n\r\nLadino-Turkish: |
| 10,322,033 sentences\r\n\r\nLadino-Turkish: 4,574,021 | | 4,574,021 sentences\r\n\r\nLadino-English: 5,748,012 |
| sentences\r\n\r\nLadino-English: 5,748,012 sentences\r\n\r\nPaper: | | sentences\r\n\r\nTotal Ladino-Spanish: 10,322,033 sentences (This is |
| | | basically combination of the two corpora)\r\n\r\nPaper: |
| https://arxiv.org/abs/2205.15599\r\n\r\nThis dataset is created as | | https://arxiv.org/abs/2205.15599\r\n\r\nThis dataset is created as |
| part of project \"Judeo-Spanish: Connecting the two ends of the | | part of project \"Judeo-Spanish: Connecting the two ends of the |
| Mediterranean\" carried out by Col\u00b7lectivaT and Sephardic Center | | Mediterranean\" carried out by Col\u00b7lectivaT and Sephardic Center |
| of Istanbul within the framework of the \u201cGrant Scheme for Common | | of Istanbul within the framework of the \u201cGrant Scheme for Common |
| Cultural Heritage: Preservation and Dialogue between Turkey and the | | Cultural Heritage: Preservation and Dialogue between Turkey and the |
| EU\u2013II (CCH-II)\u201d implemented by the Ministry of Culture and | | EU\u2013II (CCH-II)\u201d implemented by the Ministry of Culture and |
| Tourism of the Republic of Turkey with the financial support of the | | Tourism of the Republic of Turkey with the financial support of the |
| European Union. The content of this website is the sole responsibility | | European Union. The content of this website is the sole responsibility |
| of Col\u00b7lectivaT and does not necessarily reflect the views of the | | of Col\u00b7lectivaT and does not necessarily reflect the views of the |
| European Union.", | | European Union.", |
| "num_resources": 2, | | "num_resources": 2, |
| "num_tags": 1, | | "num_tags": 1, |
| "organization": { | | "organization": { |
| "approval_status": "approved", | | "approval_status": "approved", |
| "created": "2022-08-09T08:55:19.044966", | | "created": "2022-08-09T08:55:19.044966", |
| "description": "Col\u00b7lectivaT is a non-profit cooperative | | "description": "Col\u00b7lectivaT is a non-profit cooperative |
| formed by knowledge workers that provides all-around services of | | formed by knowledge workers that provides all-around services of |
| cultural translation, research and technological services for | | cultural translation, research and technological services for |
| collaborative and linguistic work.", | | collaborative and linguistic work.", |
| "id": "0510debc-4a80-4144-b198-def8e40d28d9", | | "id": "0510debc-4a80-4144-b198-def8e40d28d9", |
| "image_url": "https://collectivat.cat/img/logo_sm.png", | | "image_url": "https://collectivat.cat/img/logo_sm.png", |
| "is_organization": true, | | "is_organization": true, |
| "name": "col-lectivat", | | "name": "col-lectivat", |
| "state": "active", | | "state": "active", |
| "title": "Col\u00b7lectivaT", | | "title": "Col\u00b7lectivaT", |
| "type": "organization" | | "type": "organization" |
| }, | | }, |
| "owner_org": "0510debc-4a80-4144-b198-def8e40d28d9", | | "owner_org": "0510debc-4a80-4144-b198-def8e40d28d9", |
| "private": false, | | "private": false, |
| "relationships_as_object": [], | | "relationships_as_object": [], |
| "relationships_as_subject": [], | | "relationships_as_subject": [], |
| "resources": [ | | "resources": [ |
| { | | { |
| "cache_last_updated": null, | | "cache_last_updated": null, |
| "cache_url": null, | | "cache_url": null, |
| "created": "2022-08-09T15:57:05.916509", | | "created": "2022-08-09T15:57:05.916509", |
| "description": "", | | "description": "", |
| "format": "ZIP", | | "format": "ZIP", |
| "hash": "", | | "hash": "", |
| "id": "f267ee8d-6e12-48a0-9c6f-4ce41ee11255", | | "id": "f267ee8d-6e12-48a0-9c6f-4ce41ee11255", |
| "last_modified": null, | | "last_modified": null, |
| "metadata_modified": "2022-08-09T15:57:05.911158", | | "metadata_modified": "2022-08-09T15:57:05.911158", |
| "mimetype": "application/zip", | | "mimetype": "application/zip", |
| "mimetype_inner": null, | | "mimetype_inner": null, |
| "name": "Ladino - English", | | "name": "Ladino - English", |
| "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", |
| "position": 0, | | "position": 0, |
| "resource_type": null, | | "resource_type": null, |
| "size": null, | | "size": null, |
| "state": "active", | | "state": "active", |
| "url": | | "url": |
| //collectivat.cat/share/ladino_synthetic_parallel_dataset_en.csv.zip", | | //collectivat.cat/share/ladino_synthetic_parallel_dataset_en.csv.zip", |
| "url_type": null | | "url_type": null |
| }, | | }, |
| { | | { |
| "cache_last_updated": null, | | "cache_last_updated": null, |
| "cache_url": null, | | "cache_url": null, |
| "created": "2022-08-09T15:57:38.631681", | | "created": "2022-08-09T15:57:38.631681", |
| "description": "", | | "description": "", |
| "format": "ZIP", | | "format": "ZIP", |
| "hash": "", | | "hash": "", |
| "id": "bac5f97b-9054-4ffe-9d8f-0022af74a508", | | "id": "bac5f97b-9054-4ffe-9d8f-0022af74a508", |
| "last_modified": null, | | "last_modified": null, |
| "metadata_modified": "2022-08-09T15:57:38.626254", | | "metadata_modified": "2022-08-09T15:57:38.626254", |
| "mimetype": "application/zip", | | "mimetype": "application/zip", |
| "mimetype_inner": null, | | "mimetype_inner": null, |
| "name": "Ladino - Turkish", | | "name": "Ladino - Turkish", |
| "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", |
| "position": 1, | | "position": 1, |
| "resource_type": null, | | "resource_type": null, |
| "size": null, | | "size": null, |
| "state": "active", | | "state": "active", |
| "url": | | "url": |
| //collectivat.cat/share/ladino_synthetic_parallel_dataset_tr.csv.zip", | | //collectivat.cat/share/ladino_synthetic_parallel_dataset_tr.csv.zip", |
| "url_type": null | | "url_type": null |
| } | | } |
| ], | | ], |
| "state": "active", | | "state": "active", |
| "tags": [ | | "tags": [ |
| { | | { |
| "display_name": "parallel text", | | "display_name": "parallel text", |
| "id": "896bbff4-e82d-4f13-9668-dd08a37eb81c", | | "id": "896bbff4-e82d-4f13-9668-dd08a37eb81c", |
| "name": "parallel text", | | "name": "parallel text", |
| "state": "active", | | "state": "active", |
| "vocabulary_id": null | | "vocabulary_id": null |
| } | | } |
| ], | | ], |
t | "title": "Synthetic parallel corpora LAD-EN, TR, ES", | t | "title": "Synthetic parallel corpora LAD-EN, TR", |
| "type": "dataset", | | "type": "dataset", |
| "url": "", | | "url": "", |
| "version": "" | | "version": "" |
| } | | } |